diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h index 9944a9a92ab1f..e602f03de5ebf 100644 --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -29,6 +29,7 @@ #include #include #include +#include #include namespace llvm { @@ -529,69 +530,88 @@ class MCCFIInstruction { OpGnuArgsSize, OpLabel, OpValOffset, + OpLLVMRegisterPair, + OpLLVMVectorRegisters, + OpLLVMVectorOffset, + OpLLVMVectorRegisterMask, + }; + + // Held in ExtraFields for most common OpTypes, exceptions follow. + struct CommonFields { + unsigned Register = std::numeric_limits::max(); + int64_t Offset = 0; + unsigned Register2 = std::numeric_limits::max(); + unsigned AddressSpace = 0; + }; + // Held in ExtraFields when OpEscape. + struct EscapeFields { + std::vector Values; + std::string Comment; + }; + // Held in ExtraFields when OpLabel. + struct LabelFields { + MCSymbol *CfiLabel = nullptr; + }; + /// Held in ExtraFields when OpLLVMRegisterPair. + struct RegisterPairFields { + unsigned Register; + unsigned Reg1, Reg2; + unsigned Reg1SizeInBits, Reg2SizeInBits; + }; + struct VectorRegisterWithLane { + unsigned Register; + unsigned Lane; + unsigned SizeInBits; + }; + /// Held in ExtraFields when OpLLVMVectorRegisters. + struct VectorRegistersFields { + unsigned Register; + std::vector VectorRegisters; + }; + /// Held in ExtraFields when OpLLVMVectorOffset. + struct VectorOffsetFields { + unsigned Register; + unsigned RegisterSizeInBits; + int64_t Offset; + unsigned MaskRegister; + unsigned MaskRegisterSizeInBits; + }; + /// Held in ExtraFields when OpLLVMVectorRegisterMask. + struct VectorRegisterMaskFields { + unsigned Register; + unsigned SpillRegister; + unsigned SpillRegisterLaneSizeInBits; + unsigned MaskRegister; + unsigned MaskRegisterSizeInBits; }; private: MCSymbol *Label; - union { - struct { - unsigned Register; - int64_t Offset; - } RI; - struct { - unsigned Register; - int64_t Offset; - unsigned AddressSpace; - } RIA; - struct { - unsigned Register; - unsigned Register2; - } RR; - MCSymbol *CfiLabel; - } U; + std::variant + ExtraFields; OpType Operation; SMLoc Loc; - std::vector Values; - std::string Comment; - MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int64_t O, SMLoc Loc, - StringRef V = "", StringRef Comment = "") - : Label(L), Operation(Op), Loc(Loc), Values(V.begin(), V.end()), - Comment(Comment) { - assert(Op != OpRegister && Op != OpLLVMDefAspaceCfa); - U.RI = {R, O}; - } - MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R1, unsigned R2, SMLoc Loc) - : Label(L), Operation(Op), Loc(Loc) { - assert(Op == OpRegister); - U.RR = {R1, R2}; - } - MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int64_t O, unsigned AS, - SMLoc Loc) - : Label(L), Operation(Op), Loc(Loc) { - assert(Op == OpLLVMDefAspaceCfa); - U.RIA = {R, O, AS}; - } - - MCCFIInstruction(OpType Op, MCSymbol *L, MCSymbol *CfiLabel, SMLoc Loc) - : Label(L), Operation(Op), Loc(Loc) { - assert(Op == OpLabel); - U.CfiLabel = CfiLabel; - } + template + MCCFIInstruction(OpType Op, MCSymbol *L, FieldsType &&EF, SMLoc Loc) + : Label(L), ExtraFields(std::forward(EF)), Operation(Op), + Loc(Loc) {} public: /// .cfi_def_cfa defines a rule for computing CFA as: take address from /// Register and add Offset to it. static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc = {}) { - return MCCFIInstruction(OpDefCfa, L, Register, Offset, Loc); + return {OpDefCfa, L, CommonFields{Register, Offset}, Loc}; } /// .cfi_def_cfa_register modifies a rule for computing CFA. From now /// on Register will be used instead of the old one. Offset remains the same. static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc = {}) { - return MCCFIInstruction(OpDefCfaRegister, L, Register, INT64_C(0), Loc); + return {OpDefCfaRegister, L, CommonFields{Register}, Loc}; } /// .cfi_def_cfa_offset modifies a rule for computing CFA. Register @@ -599,7 +619,7 @@ class MCCFIInstruction { /// that will be added to a defined register to the compute CFA address. static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc = {}) { - return MCCFIInstruction(OpDefCfaOffset, L, 0, Offset, Loc); + return {OpDefCfaOffset, L, CommonFields{0, Offset}, Loc}; } /// .cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but @@ -607,7 +627,7 @@ class MCCFIInstruction { /// offset. static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc = {}) { - return MCCFIInstruction(OpAdjustCfaOffset, L, 0, Adjustment, Loc); + return {OpAdjustCfaOffset, L, CommonFields{0, Adjustment}, Loc}; } // FIXME: Update the remaining docs to use the new proposal wording. @@ -618,15 +638,15 @@ class MCCFIInstruction { int64_t Offset, unsigned AddressSpace, SMLoc Loc) { - return MCCFIInstruction(OpLLVMDefAspaceCfa, L, Register, Offset, - AddressSpace, Loc); + return {OpLLVMDefAspaceCfa, L, + CommonFields{Register, Offset, 0, AddressSpace}, Loc}; } /// .cfi_offset Previous value of Register is saved at offset Offset /// from CFA. static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc = {}) { - return MCCFIInstruction(OpOffset, L, Register, Offset, Loc); + return {OpOffset, L, CommonFields{Register, Offset}, Loc}; } /// .cfi_rel_offset Previous value of Register is saved at offset @@ -634,30 +654,30 @@ class MCCFIInstruction { /// using the known displacement of the CFA register from the CFA. static MCCFIInstruction createRelOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc = {}) { - return MCCFIInstruction(OpRelOffset, L, Register, Offset, Loc); + return {OpRelOffset, L, CommonFields{Register, Offset}, Loc}; } /// .cfi_register Previous value of Register1 is saved in /// register Register2. static MCCFIInstruction createRegister(MCSymbol *L, unsigned Register1, unsigned Register2, SMLoc Loc = {}) { - return MCCFIInstruction(OpRegister, L, Register1, Register2, Loc); + return {OpRegister, L, CommonFields{Register1, 0, Register2}, Loc}; } /// .cfi_window_save SPARC register window is saved. static MCCFIInstruction createWindowSave(MCSymbol *L, SMLoc Loc = {}) { - return MCCFIInstruction(OpWindowSave, L, 0, INT64_C(0), Loc); + return {OpWindowSave, L, CommonFields{}, Loc}; } /// .cfi_negate_ra_state AArch64 negate RA state. static MCCFIInstruction createNegateRAState(MCSymbol *L, SMLoc Loc = {}) { - return MCCFIInstruction(OpNegateRAState, L, 0, INT64_C(0), Loc); + return {OpNegateRAState, L, CommonFields{}, Loc}; } /// .cfi_negate_ra_state_with_pc AArch64 negate RA state with PC. static MCCFIInstruction createNegateRAStateWithPC(MCSymbol *L, SMLoc Loc = {}) { - return MCCFIInstruction(OpNegateRAStateWithPC, L, 0, INT64_C(0), Loc); + return {OpNegateRAStateWithPC, L, CommonFields{}, Loc}; } /// .cfi_restore says that the rule for Register is now the same as it @@ -665,104 +685,157 @@ class MCCFIInstruction { /// by .cfi_startproc were executed. static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc = {}) { - return MCCFIInstruction(OpRestore, L, Register, INT64_C(0), Loc); + return {OpRestore, L, CommonFields{Register}, Loc}; } /// .cfi_undefined From now on the previous value of Register can't be /// restored anymore. static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register, SMLoc Loc = {}) { - return MCCFIInstruction(OpUndefined, L, Register, INT64_C(0), Loc); + return {OpUndefined, L, CommonFields{Register}, Loc}; } /// .cfi_same_value Current value of Register is the same as in the /// previous frame. I.e., no restoration is needed. static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register, SMLoc Loc = {}) { - return MCCFIInstruction(OpSameValue, L, Register, INT64_C(0), Loc); + return {OpSameValue, L, CommonFields{Register}, Loc}; } /// .cfi_remember_state Save all current rules for all registers. static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc = {}) { - return MCCFIInstruction(OpRememberState, L, 0, INT64_C(0), Loc); + return {OpRememberState, L, CommonFields{}, Loc}; } /// .cfi_restore_state Restore the previously saved state. static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc = {}) { - return MCCFIInstruction(OpRestoreState, L, 0, INT64_C(0), Loc); + return {OpRestoreState, L, CommonFields{}, Loc}; } /// .cfi_escape Allows the user to add arbitrary bytes to the unwind /// info. static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc = {}, StringRef Comment = "") { - return MCCFIInstruction(OpEscape, L, 0, 0, Loc, Vals, Comment); + return {OpEscape, L, + EscapeFields{std::vector(Vals.begin(), Vals.end()), + Comment.str()}, + Loc}; } /// A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc = {}) { - return MCCFIInstruction(OpGnuArgsSize, L, 0, Size, Loc); + return {OpGnuArgsSize, L, CommonFields{0, Size}, Loc}; } static MCCFIInstruction createLabel(MCSymbol *L, MCSymbol *CfiLabel, SMLoc Loc) { - return MCCFIInstruction(OpLabel, L, CfiLabel, Loc); + return {OpLabel, L, LabelFields{CfiLabel}, Loc}; } + /// .cfi_llvm_register_pair Previous value of Register is saved in R1:R2. + static MCCFIInstruction + createLLVMRegisterPair(MCSymbol *L, unsigned Register, unsigned R1, + unsigned R1SizeInBits, unsigned R2, + unsigned R2SizeInBits, SMLoc Loc = {}) { + RegisterPairFields Extra{Register, R1, R2, R1SizeInBits, R2SizeInBits}; + return {OpLLVMRegisterPair, L, Extra, Loc}; + } + + /// .cfi_llvm_vector_registers Previous value of Register is saved in lanes of + /// vector registers. + static MCCFIInstruction + createLLVMVectorRegisters(MCSymbol *L, unsigned Register, + std::vector VectorRegisters, + SMLoc Loc = {}) { + VectorRegistersFields Extra{Register, std::move(VectorRegisters)}; + return {OpLLVMVectorRegisters, L, std::move(Extra), Loc}; + } + + /// .cfi_llvm_vector_offset Previous value of Register is saved at Offset from + /// CFA. MaskRegister specifies the active lanes of register. + static MCCFIInstruction + createLLVMVectorOffset(MCSymbol *L, unsigned Register, + unsigned RegisterSizeInBits, unsigned MaskRegister, + unsigned MaskRegisterSizeInBits, int64_t Offset, + SMLoc Loc = {}) { + VectorOffsetFields Extra{Register, RegisterSizeInBits, Offset, MaskRegister, + MaskRegisterSizeInBits}; + return MCCFIInstruction(OpLLVMVectorOffset, L, Extra, Loc); + } + + /// .cfi_llvm_vector_register_mask Previous value of Register is saved in + /// SpillRegister, predicated on the value of MaskRegister. + static MCCFIInstruction createLLVMVectorRegisterMask( + MCSymbol *L, unsigned Register, unsigned SpillRegister, + unsigned SpillRegisterLaneSizeInBits, unsigned MaskRegister, + unsigned MaskRegisterSizeInBits, SMLoc Loc = {}) { + VectorRegisterMaskFields Extra{ + Register, SpillRegister, SpillRegisterLaneSizeInBits, + MaskRegister, MaskRegisterSizeInBits, + }; + return MCCFIInstruction(OpLLVMVectorRegisterMask, L, Extra, Loc); + } + + template ExtraFieldsTy &getExtraFields() { + return std::get(ExtraFields); + } + + template const ExtraFieldsTy &getExtraFields() const { + return std::get(ExtraFields); + } /// .cfi_val_offset Previous value of Register is offset Offset from the /// current CFA register. static MCCFIInstruction createValOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc = {}) { - return MCCFIInstruction(OpValOffset, L, Register, Offset, Loc); + return {OpValOffset, L, CommonFields{Register, Offset}, Loc}; } OpType getOperation() const { return Operation; } MCSymbol *getLabel() const { return Label; } unsigned getRegister() const { - if (Operation == OpRegister) - return U.RR.Register; - if (Operation == OpLLVMDefAspaceCfa) - return U.RIA.Register; assert(Operation == OpDefCfa || Operation == OpOffset || Operation == OpRestore || Operation == OpUndefined || Operation == OpSameValue || Operation == OpDefCfaRegister || - Operation == OpRelOffset || Operation == OpValOffset); - return U.RI.Register; + Operation == OpRelOffset || Operation == OpValOffset || + Operation == OpRegister || Operation == OpLLVMDefAspaceCfa); + return std::get(ExtraFields).Register; } unsigned getRegister2() const { assert(Operation == OpRegister); - return U.RR.Register2; + return std::get(ExtraFields).Register2; } unsigned getAddressSpace() const { assert(Operation == OpLLVMDefAspaceCfa); - return U.RIA.AddressSpace; + return std::get(ExtraFields).AddressSpace; } int64_t getOffset() const { - if (Operation == OpLLVMDefAspaceCfa) - return U.RIA.Offset; assert(Operation == OpDefCfa || Operation == OpOffset || Operation == OpRelOffset || Operation == OpDefCfaOffset || Operation == OpAdjustCfaOffset || Operation == OpGnuArgsSize || - Operation == OpValOffset); - return U.RI.Offset; + Operation == OpValOffset || Operation == OpLLVMDefAspaceCfa); + return std::get(ExtraFields).Offset; } MCSymbol *getCfiLabel() const { assert(Operation == OpLabel); - return U.CfiLabel; + return std::get(ExtraFields).CfiLabel; } StringRef getValues() const { assert(Operation == OpEscape); + auto &Values = std::get(ExtraFields).Values; return StringRef(&Values[0], Values.size()); } - StringRef getComment() const { return Comment; } + StringRef getComment() const { + assert(Operation == OpEscape); + return std::get(ExtraFields).Comment; + } SMLoc getLoc() const { return Loc; } }; diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 79c715e3820a6..4e76aa323eb30 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -1012,6 +1012,24 @@ class LLVM_ABI MCStreamer { SMLoc Loc = {}); virtual void emitCFIWindowSave(SMLoc Loc = {}); virtual void emitCFINegateRAState(SMLoc Loc = {}); + virtual void emitCFILLVMRegisterPair(int64_t Register, int64_t R1, + int64_t R1SizeInBits, int64_t R2, + int64_t R2SizeInBits, SMLoc Loc = {}); + virtual void emitCFILLVMVectorRegisters( + int64_t Register, + std::vector VRs, + SMLoc Loc = {}); + virtual void emitCFILLVMVectorOffset(int64_t Register, + int64_t RegisterSizeInBits, + int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, + int64_t Offset, SMLoc Loc = {}); + virtual void + emitCFILLVMVectorRegisterMask(int64_t Register, int64_t SpillRegister, + int64_t SpillRegisterLaneSizeInBits, + int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, SMLoc Loc = {}); + virtual void emitCFINegateRAStateWithPC(SMLoc Loc = {}); virtual void emitCFILabelDirective(SMLoc Loc, StringRef Name); virtual void emitCFIValOffset(int64_t Register, int64_t Offset, diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 2a146eb15f709..895c18abc56f9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -260,6 +260,39 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpRestoreState: OutStreamer->emitCFIRestoreState(Loc); break; + case MCCFIInstruction::OpLLVMRegisterPair: { + const auto &Fields = + Inst.getExtraFields(); + OutStreamer->emitCFILLVMRegisterPair(Fields.Register, Fields.Reg1, + Fields.Reg1SizeInBits, Fields.Reg2, + Fields.Reg2SizeInBits, Loc); + break; + } + case MCCFIInstruction::OpLLVMVectorRegisters: { + const auto &Fields = + Inst.getExtraFields(); + OutStreamer->emitCFILLVMVectorRegisters(Fields.Register, + Fields.VectorRegisters, Loc); + break; + } + case MCCFIInstruction::OpLLVMVectorOffset: { + const auto &Fields = + Inst.getExtraFields(); + OutStreamer->emitCFILLVMVectorOffset( + Fields.Register, Fields.RegisterSizeInBits, Fields.MaskRegister, + Fields.MaskRegisterSizeInBits, Fields.Offset, Loc); + break; + } + case MCCFIInstruction::OpLLVMVectorRegisterMask: { + const auto &Fields = + Inst.getExtraFields(); + OutStreamer->emitCFILLVMVectorRegisterMask( + Fields.Register, Fields.SpillRegister, + Fields.SpillRegisterLaneSizeInBits, Fields.MaskRegister, + Fields.MaskRegisterSizeInBits); + break; + } + case MCCFIInstruction::OpValOffset: OutStreamer->emitCFIValOffset(Inst.getRegister(), Inst.getOffset(), Loc); break; diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index 14098bc821617..0d60d17da0cf7 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -262,6 +262,10 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { case MCCFIInstruction::OpNegateRAState: case MCCFIInstruction::OpNegateRAStateWithPC: case MCCFIInstruction::OpGnuArgsSize: + case MCCFIInstruction::OpLLVMRegisterPair: + case MCCFIInstruction::OpLLVMVectorRegisters: + case MCCFIInstruction::OpLLVMVectorOffset: + case MCCFIInstruction::OpLLVMVectorRegisterMask: case MCCFIInstruction::OpLabel: case MCCFIInstruction::OpValOffset: break; diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 8b72c295416a2..8ed590669a3b0 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -240,6 +240,11 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("window_save", MIToken::kw_cfi_window_save) .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state) + .Case("llvm_register_pair", MIToken::kw_cfi_llvm_register_pair) + .Case("llvm_vector_registers", MIToken::kw_cfi_llvm_vector_registers) + .Case("llvm_vector_offset", MIToken::kw_cfi_llvm_vector_offset) + .Case("llvm_vector_register_mask", + MIToken::kw_cfi_llvm_vector_register_mask) .Case("negate_ra_sign_state_with_pc", MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc) .Case("blockaddress", MIToken::kw_blockaddress) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 0627f176b9e00..abac1880f94e0 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -98,6 +98,10 @@ struct MIToken { kw_cfi_undefined, kw_cfi_window_save, kw_cfi_aarch64_negate_ra_sign_state, + kw_cfi_llvm_register_pair, + kw_cfi_llvm_vector_registers, + kw_cfi_llvm_vector_offset, + kw_cfi_llvm_vector_register_mask, kw_cfi_aarch64_negate_ra_sign_state_with_pc, kw_blockaddress, kw_intrinsic, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 6a464d9dd6886..3618022d89bed 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -484,6 +484,7 @@ class MIParser { bool parseDILocation(MDNode *&Expr); bool parseMetadataOperand(MachineOperand &Dest); bool parseCFIOffset(int &Offset); + bool parseCFIUnsigned(unsigned &Value); bool parseCFIRegister(unsigned &Reg); bool parseCFIAddressSpace(unsigned &AddressSpace); bool parseCFIEscapeValues(std::string& Values); @@ -2475,6 +2476,13 @@ bool MIParser::parseCFIOffset(int &Offset) { return false; } +bool MIParser::parseCFIUnsigned(unsigned &Value) { + if (getUnsigned(Value)) + return true; + lex(); + return false; +} + bool MIParser::parseCFIRegister(unsigned &Reg) { if (Token.isNot(MIToken::NamedRegister)) return error("expected a cfi register"); @@ -2608,6 +2616,69 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { case MIToken::kw_cfi_aarch64_negate_ra_sign_state: CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); break; + case MIToken::kw_cfi_llvm_register_pair: { + unsigned Reg, R1, R2; + unsigned R1Size, R2Size; + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIRegister(R1) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(R1Size) || expectAndConsume(MIToken::comma) || + parseCFIRegister(R2) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(R2Size)) + return true; + + CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMRegisterPair( + nullptr, Reg, R1, R1Size, R2, R2Size)); + break; + } + case MIToken::kw_cfi_llvm_vector_registers: { + std::vector VectorRegisters; + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma)) + return true; + do { + unsigned VR; + unsigned Lane, Size; + if (parseCFIRegister(VR) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(Lane) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(Size)) + return true; + VectorRegisters.push_back({VR, Lane, Size}); + } while (consumeIfPresent(MIToken::comma)); + + CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMVectorRegisters( + nullptr, Reg, std::move(VectorRegisters))); + break; + } + case MIToken::kw_cfi_llvm_vector_offset: { + unsigned Reg, MaskReg; + unsigned RegSize, MaskRegSize; + int Offset = 0; + + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(RegSize) || expectAndConsume(MIToken::comma) || + parseCFIRegister(MaskReg) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(MaskRegSize) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + + CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMVectorOffset( + nullptr, Reg, RegSize, MaskReg, MaskRegSize, Offset)); + break; + } + case MIToken::kw_cfi_llvm_vector_register_mask: { + unsigned Reg, SpillReg, MaskReg; + unsigned SpillRegLaneSize, MaskRegSize; + + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIRegister(SpillReg) || expectAndConsume(MIToken::comma) || + parseCFIUnsigned(SpillRegLaneSize) || + expectAndConsume(MIToken::comma) || parseCFIRegister(MaskReg) || + expectAndConsume(MIToken::comma) || parseCFIUnsigned(MaskRegSize)) + return true; + + CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMVectorRegisterMask( + nullptr, Reg, SpillReg, SpillRegLaneSize, MaskReg, MaskRegSize)); + break; + } case MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc: CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAStateWithPC(nullptr)); @@ -2962,6 +3033,10 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, case MIToken::kw_cfi_undefined: case MIToken::kw_cfi_window_save: case MIToken::kw_cfi_aarch64_negate_ra_sign_state: + case MIToken::kw_cfi_llvm_register_pair: + case MIToken::kw_cfi_llvm_vector_registers: + case MIToken::kw_cfi_llvm_vector_offset: + case MIToken::kw_cfi_llvm_vector_register_mask: case MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc: return parseCFIOperand(Dest); case MIToken::kw_blockaddress: diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index bb9c76ff0c729..db802cc6e9024 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -778,6 +778,64 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI, if (MCSymbol *Label = CFI.getLabel()) MachineOperand::printSymbol(OS, *Label); break; + case MCCFIInstruction::OpLLVMRegisterPair: { + const auto &Fields = + CFI.getExtraFields(); + + OS << "llvm_register_pair "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(Fields.Register, OS, TRI); + OS << ", "; + printCFIRegister(Fields.Reg1, OS, TRI); + OS << ", " << Fields.Reg1SizeInBits << ", "; + printCFIRegister(Fields.Reg2, OS, TRI); + OS << ", " << Fields.Reg2SizeInBits; + break; + } + case MCCFIInstruction::OpLLVMVectorRegisters: { + const auto &Fields = + CFI.getExtraFields(); + + OS << "llvm_vector_registers "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(Fields.Register, OS, TRI); + for (auto [Reg, Lane, Size] : Fields.VectorRegisters) { + OS << ", "; + printCFIRegister(Reg, OS, TRI); + OS << ", " << Lane << ", " << Size; + } + break; + } + case MCCFIInstruction::OpLLVMVectorOffset: { + const auto &Fields = + CFI.getExtraFields(); + + OS << "llvm_vector_offset "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(Fields.Register, OS, TRI); + OS << ", " << Fields.RegisterSizeInBits << ", "; + printCFIRegister(Fields.MaskRegister, OS, TRI); + OS << ", " << Fields.MaskRegisterSizeInBits << ", " << Fields.Offset; + break; + } + case MCCFIInstruction::OpLLVMVectorRegisterMask: { + const auto &Fields = + CFI.getExtraFields(); + + OS << "llvm_vector_register_mask "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(Fields.Register, OS, TRI); + OS << ", "; + printCFIRegister(Fields.SpillRegister, OS, TRI); + OS << ", " << Fields.SpillRegisterLaneSizeInBits << ", "; + printCFIRegister(Fields.MaskRegister, OS, TRI); + OS << ", " << Fields.MaskRegisterSizeInBits; + break; + } case MCCFIInstruction::OpNegateRAStateWithPC: OS << "negate_ra_sign_state_with_pc "; if (MCSymbol *Label = CFI.getLabel()) diff --git a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp index bca820fa807c8..4d2d2da8a4445 100644 --- a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp +++ b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp @@ -161,6 +161,16 @@ dwarf::CFIProgram DWARFCFIState::convert(MCCFIInstruction Directive) { CFIP.addInstruction(dwarf::DW_CFA_val_offset, Directive.getRegister(), Directive.getOffset()); break; + case MCCFIInstruction::OpLLVMRegisterPair: + case MCCFIInstruction::OpLLVMVectorRegisters: + case MCCFIInstruction::OpLLVMVectorOffset: + case MCCFIInstruction::OpLLVMVectorRegisterMask: + // TODO: These should be pretty straightforward to support, but is low + // priority. Similarly the implementation of OpLLVMDefAspaceCfa above + // seem incomplete and should be fixed. + Context->reportWarning(Directive.getLoc(), + "this directive is not supported, ignoring it"); + break; } return CFIP; diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index be8c022f39ad1..6c54a9efbf2c6 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -369,6 +369,21 @@ class MCAsmStreamer final : public MCStreamer { void emitCFINegateRAState(SMLoc Loc) override; void emitCFINegateRAStateWithPC(SMLoc Loc) override; void emitCFIReturnColumn(int64_t Register) override; + void emitCFILLVMRegisterPair(int64_t Register, int64_t R1, int64_t R1Size, + int64_t R2, int64_t R2Size, SMLoc Loc) override; + void emitCFILLVMVectorRegisters( + int64_t Register, + std::vector VRs, + SMLoc Loc) override; + void emitCFILLVMVectorOffset(int64_t Register, int64_t RegisterSize, + int64_t MaskRegister, int64_t MaskRegisterSize, + int64_t Offset, SMLoc Loc) override; + void emitCFILLVMVectorRegisterMask(int64_t Register, int64_t SpillRegister, + int64_t SpillRegisterLaneSizeInBits, + int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, + SMLoc Loc) override; + void emitCFILabelDirective(SMLoc Loc, StringRef Name) override; void emitCFIValOffset(int64_t Register, int64_t Offset, SMLoc Loc) override; @@ -2101,6 +2116,67 @@ void MCAsmStreamer::emitCFIRegister(int64_t Register1, int64_t Register2, EmitEOL(); } +void MCAsmStreamer::emitCFILLVMRegisterPair(int64_t Register, int64_t R1, + int64_t R1Size, int64_t R2, + int64_t R2Size, SMLoc Loc) { + MCStreamer::emitCFILLVMRegisterPair(Register, R1, R1Size, R2, R2Size, Loc); + + OS << "\t.cfi_llvm_register_pair "; + EmitRegisterName(Register); + OS << ", "; + EmitRegisterName(R1); + OS << ", " << R1Size << ", "; + EmitRegisterName(R2); + OS << ", " << R2Size; + EmitEOL(); +} + +void MCAsmStreamer::emitCFILLVMVectorRegisters( + int64_t Register, std::vector VRs, + SMLoc Loc) { + MCStreamer::emitCFILLVMVectorRegisters(Register, VRs, Loc); + + OS << "\t.cfi_llvm_vector_registers "; + EmitRegisterName(Register); + for (auto [Reg, Lane, Size] : VRs) + OS << ", " << Reg << ", " << Lane << ", " << Size; + EmitEOL(); +} + +void MCAsmStreamer::emitCFILLVMVectorOffset(int64_t Register, + int64_t RegisterSize, + int64_t MaskRegister, + int64_t MaskRegisterSize, + int64_t Offset, SMLoc Loc) { + MCStreamer::emitCFILLVMVectorOffset(Register, RegisterSize, MaskRegister, + MaskRegisterSize, Offset, Loc); + + OS << "\t.cfi_llvm_vector_offset "; + EmitRegisterName(Register); + OS << ", " << RegisterSize << ", "; + EmitRegisterName(MaskRegister); + OS << ", " << MaskRegisterSize << ", " << Offset; + EmitEOL(); +} + +void MCAsmStreamer::emitCFILLVMVectorRegisterMask( + int64_t Register, int64_t SpillRegister, + int64_t SpillRegisterLaneSizeInBits, int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, SMLoc Loc) { + MCStreamer::emitCFILLVMVectorRegisterMask( + Register, SpillRegister, SpillRegisterLaneSizeInBits, MaskRegister, + MaskRegisterSizeInBits, Loc); + + OS << "\t.cfi_llvm_vector_register_mask "; + EmitRegisterName(Register); + OS << ", "; + EmitRegisterName(SpillRegister); + OS << ", " << SpillRegisterLaneSizeInBits << ", "; + EmitRegisterName(MaskRegister); + OS << ", " << MaskRegisterSizeInBits; + EmitEOL(); +} + void MCAsmStreamer::emitCFIWindowSave(SMLoc Loc) { MCStreamer::emitCFIWindowSave(Loc); OS << "\t.cfi_window_save"; diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index e8f000a584839..09a93dd34ece3 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -1377,6 +1377,16 @@ static void emitEncodingByte(MCObjectStreamer &Streamer, unsigned Encoding) { Streamer.emitInt8(Encoding); } +static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS) { + assert(DwarfReg >= 0); + if (DwarfReg < 32) { + OS << uint8_t(dwarf::DW_OP_reg0 + DwarfReg); + } else { + OS << uint8_t(dwarf::DW_OP_regx); + encodeULEB128(DwarfReg, OS); + } +} + void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) { int dataAlignmentFactor = getDataAlignmentFactor(Streamer); auto *MRI = Streamer.getContext().getRegisterInfo(); @@ -1521,6 +1531,7 @@ void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) { case MCCFIInstruction::OpEscape: Streamer.emitBytes(Instr.getValues()); return; + case MCCFIInstruction::OpLabel: Streamer.emitLabel(Instr.getCfiLabel(), Instr.getLoc()); return; @@ -1543,7 +1554,182 @@ void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) { } return; } + case MCCFIInstruction::OpLLVMRegisterPair: { + // CFI for a register spilled to a pair of SGPRs is implemented as an + // expression(E) rule where E is a composite location description with + // multiple parts each referencing SGPR register location storage with a bit + // offset of 0. In other words we generate the following DWARF: + // + // DW_CFA_expression: , + // (DW_OP_regx ) (DW_OP_piece ) + // (DW_OP_regx ) (DW_OP_piece ) + // + // The memory location description for the current CFA is pushed on the + // stack before E is evaluated, but we choose not to drop it as it would + // require a longer expression E and DWARF defines the result of the + // evaulation to be the location description on the top of the stack (i.e. + // the implictly pushed one is just ignored.) + + const auto &Fields = + Instr.getExtraFields(); + + SmallString<10> Block; + raw_svector_ostream OSBlock(Block); + encodeDwarfRegisterLocation(Fields.Reg1, OSBlock); + if (Fields.Reg1SizeInBits % 8 == 0) { + OSBlock << uint8_t(dwarf::DW_OP_piece); + encodeULEB128(Fields.Reg1SizeInBits / 8, OSBlock); + } else { + OSBlock << uint8_t(dwarf::DW_OP_bit_piece); + encodeULEB128(Fields.Reg1SizeInBits, OSBlock); + encodeULEB128(0, OSBlock); + } + encodeDwarfRegisterLocation(Fields.Reg2, OSBlock); + if (Fields.Reg2SizeInBits % 8 == 0) { + OSBlock << uint8_t(dwarf::DW_OP_piece); + encodeULEB128(Fields.Reg2SizeInBits / 8, OSBlock); + } else { + OSBlock << uint8_t(dwarf::DW_OP_bit_piece); + encodeULEB128(Fields.Reg2SizeInBits, OSBlock); + encodeULEB128(0, OSBlock); + } + + Streamer.emitInt8(dwarf::DW_CFA_expression); + Streamer.emitULEB128IntValue(Fields.Register); + Streamer.emitULEB128IntValue(Block.size()); + Streamer.emitBinaryData(StringRef(&Block[0], Block.size())); + return; } + case MCCFIInstruction::OpLLVMVectorRegisters: { + // CFI for an SGPR spilled to a multiple lanes of VGPRs is implemented as an + // expression(E) rule where E is a composite location description with + // multiple parts each referencing VGPR register location storage with a bit + // offset of the lane index multiplied by the size of a lane. In other words + // we generate the following DWARF: + // + // DW_CFA_expression: , + // (DW_OP_regx ) (DW_OP_bit_piece , *) + // (DW_OP_regx ) (DW_OP_bit_piece , *) + // ... + // (DW_OP_regx ) (DW_OP_bit_piece , *) + // + // However if we're only using a single lane then we can emit a slightly + // more optimal form: + // + // DW_CFA_expression: , + // (DW_OP_regx ) (DW_OP_LLVM_offset_uconst *) + // + // The memory location description for the current CFA is pushed on the + // stack before E is evaluated, but we choose not to drop it as it would + // require a longer expression E and DWARF defines the result of the + // evaulation to be the location description on the top of the stack (i.e. + // the implictly pushed one is just ignored.) + + const auto &Fields = + Instr.getExtraFields(); + auto &VRs = Fields.VectorRegisters; + + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + + if (VRs.size() == 1 && VRs[0].SizeInBits % 8 == 0) { + encodeDwarfRegisterLocation(VRs[0].Register, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_offset_uconst); + encodeULEB128((VRs[0].SizeInBits / 8) * VRs[0].Lane, OSBlock); + } else { + for (const auto &VR : VRs) { + // TODO: Detect when we can merge multiple adjacent pieces, or even + // reduce this to a register location description (when all pieces are + // adjacent). + encodeDwarfRegisterLocation(VR.Register, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_bit_piece); + encodeULEB128(VR.SizeInBits, OSBlock); + encodeULEB128(VR.SizeInBits * VR.Lane, OSBlock); + } + } + + Streamer.emitInt8(dwarf::DW_CFA_expression); + Streamer.emitULEB128IntValue(Fields.Register); + Streamer.emitULEB128IntValue(Block.size()); + Streamer.emitBinaryData(StringRef(&Block[0], Block.size())); + return; + } + case MCCFIInstruction::OpLLVMVectorOffset: { + // CFI for a vector register spilled to memory is implemented as an + // expression(E) rule where E is a location description. + // + // DW_CFA_expression: , + // (DW_OP_regx ) + // (DW_OP_swap) + // (DW_OP_LLVM_offset_uconst ) + // (DW_OP_LLVM_call_frame_entry_reg ) + // (DW_OP_deref_size ) + // (DW_OP_LLVM_select_bit_piece ) + + const auto &Fields = + Instr.getExtraFields(); + + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + encodeDwarfRegisterLocation(Fields.Register, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_swap); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_offset_uconst); + encodeULEB128(Fields.Offset, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_call_frame_entry_reg); + encodeULEB128(Fields.MaskRegister, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_deref_size); + OSBlock << uint8_t(Fields.MaskRegisterSizeInBits / 8); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_select_bit_piece); + encodeULEB128(Fields.RegisterSizeInBits, OSBlock); + encodeULEB128(Fields.MaskRegisterSizeInBits, OSBlock); + + Streamer.emitInt8(dwarf::DW_CFA_expression); + Streamer.emitULEB128IntValue(Fields.Register); + Streamer.emitULEB128IntValue(Block.size()); + Streamer.emitBinaryData(StringRef(&Block[0], Block.size())); + return; + } + case MCCFIInstruction::OpLLVMVectorRegisterMask: { + // CFI for a VGPR/AGPR partially spilled to another VGPR/AGPR dependent on + // an EXEC mask is implemented as an expression(E) rule where E is a + // location description. + // + // DW_CFA_expression: , + // (DW_OP_regx ) + // (DW_OP_regx ) + // (DW_OP_LLVM_call_frame_entry_reg ) + // (DW_OP_deref_size ) + // (DW_OP_LLVM_select_bit_piece ) + + const auto Fields = + Instr.getExtraFields(); + + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + encodeDwarfRegisterLocation(Fields.Register, OSBlock); + encodeDwarfRegisterLocation(Fields.SpillRegister, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_call_frame_entry_reg); + encodeULEB128(Fields.MaskRegister, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_deref_size) + << uint8_t(Fields.MaskRegisterSizeInBits / 8); + OSBlock << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_select_bit_piece); + encodeULEB128(Fields.SpillRegisterLaneSizeInBits, OSBlock); + encodeULEB128(Fields.MaskRegisterSizeInBits, OSBlock); + + Streamer.emitInt8(dwarf::DW_CFA_expression); + Streamer.emitULEB128IntValue(Fields.Register); + Streamer.emitULEB128IntValue(Block.size()); + Streamer.emitBinaryData(StringRef(&Block[0], Block.size())); + return; + } + } + llvm_unreachable("Unhandled case in switch"); } diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index acea3ab23680a..0a3f22083a51d 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -493,6 +493,10 @@ class AsmParser : public MCAsmParser { DK_CFI_LLVM_DEF_ASPACE_CFA, DK_CFI_OFFSET, DK_CFI_REL_OFFSET, + DK_CFI_LLVM_REGISTER_PAIR, + DK_CFI_LLVM_VECTOR_REGISTERS, + DK_CFI_LLVM_VECTOR_OFFSET, + DK_CFI_LLVM_VECTOR_REGISTER_MASK, DK_CFI_PERSONALITY, DK_CFI_LSDA, DK_CFI_REMEMBER_STATE, @@ -610,6 +614,10 @@ class AsmParser : public MCAsmParser { bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc); bool parseDirectiveCFISignalFrame(SMLoc DirectiveLoc); bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc); + bool parseDirectiveCFILLVMRegisterPair(SMLoc DirectiveLoc); + bool parseDirectiveCFILLVMVectorRegisters(SMLoc DirectiveLoc); + bool parseDirectiveCFILLVMVectorOffset(SMLoc DirectiveLoc); + bool parseDirectiveCFILLVMVectorRegisterMask(SMLoc DirectiveLoc); bool parseDirectiveCFILabel(SMLoc DirectiveLoc); bool parseDirectiveCFIValOffset(SMLoc DirectiveLoc); @@ -2116,6 +2124,14 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveCFIOffset(IDLoc); case DK_CFI_REL_OFFSET: return parseDirectiveCFIRelOffset(IDLoc); + case DK_CFI_LLVM_REGISTER_PAIR: + return parseDirectiveCFILLVMRegisterPair(IDLoc); + case DK_CFI_LLVM_VECTOR_REGISTERS: + return parseDirectiveCFILLVMVectorRegisters(IDLoc); + case DK_CFI_LLVM_VECTOR_OFFSET: + return parseDirectiveCFILLVMVectorOffset(IDLoc); + case DK_CFI_LLVM_VECTOR_REGISTER_MASK: + return parseDirectiveCFILLVMVectorRegisterMask(IDLoc); case DK_CFI_PERSONALITY: return parseDirectiveCFIPersonalityOrLsda(true); case DK_CFI_LSDA: @@ -4410,6 +4426,91 @@ bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) { return false; } +/// parseDirectiveCFILLVMRegisterPair +/// ::= .cfi_llvm_register_pair reg, r1, r1size, r2, r2size +bool AsmParser::parseDirectiveCFILLVMRegisterPair(SMLoc DirectiveLoc) { + int64_t Register = 0; + int64_t R1 = 0, R2 = 0; + int64_t R1Size = 0, R2Size = 0; + + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseComma() || + parseRegisterOrRegisterNumber(R1, DirectiveLoc) || parseComma() || + parseAbsoluteExpression(R1Size) || parseComma() || + parseRegisterOrRegisterNumber(R2, DirectiveLoc) || parseComma() || + parseAbsoluteExpression(R2Size) || parseEOL()) + return true; + + getStreamer().emitCFILLVMRegisterPair(Register, R1, R1Size, R2, R2Size, + DirectiveLoc); + return false; +} + +/// parseDirectiveCFILLVMVectorRegisters +/// ::= .cfi_llvm_vector_registers reg, vreg0, vlane0, vreg0size, +bool AsmParser::parseDirectiveCFILLVMVectorRegisters(SMLoc DirectiveLoc) { + int64_t Register = 0; + std::vector VRs; + + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseComma()) + return true; + + do { + int64_t VectorRegister = 0; + int64_t Lane = 0; + int64_t Size = 0; + if (parseRegisterOrRegisterNumber(VectorRegister, DirectiveLoc) || + parseComma() || parseIntToken(Lane, "expected a lane number") || + parseComma() || parseAbsoluteExpression(Size)) + return true; + VRs.push_back({unsigned(VectorRegister), unsigned(Lane), unsigned(Size)}); + } while (parseOptionalToken(AsmToken::Comma)); + + if (parseEOL()) + return true; + + getStreamer().emitCFILLVMVectorRegisters(Register, std::move(VRs), + DirectiveLoc); + return false; +} + +/// parseDirectiveCFILLVMVectorOffset +/// ::= .cfi_llvm_vector_offset register, register-size, mask, mask-size, offset +bool AsmParser::parseDirectiveCFILLVMVectorOffset(SMLoc DirectiveLoc) { + int64_t Register = 0, MaskRegister = 0; + int64_t RegisterSize = 0, MaskRegisterSize = 0; + int64_t Offset = 0; + + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseComma() || + parseAbsoluteExpression(RegisterSize) || parseComma() || + parseRegisterOrRegisterNumber(MaskRegister, DirectiveLoc) || + parseComma() || parseAbsoluteExpression(MaskRegisterSize) || + parseComma() || parseAbsoluteExpression(Offset) || parseEOL()) + return true; + + getStreamer().emitCFILLVMVectorOffset(Register, RegisterSize, MaskRegister, + MaskRegisterSize, Offset, DirectiveLoc); + return false; +} + +/// parseDirectiveCFILLVMVectorOffset +/// ::= .cfi_llvm_vector_register_mask register, spill-reg, spill-reg-lane-size, +/// mask-reg, mask-reg-size +bool AsmParser::parseDirectiveCFILLVMVectorRegisterMask(SMLoc DirectiveLoc) { + int64_t Register = 0, SpillReg = 0, MaskReg = 0; + int64_t SpillRegLaneSize = 0, MaskRegSize = 0; + + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseComma() || + parseRegisterOrRegisterNumber(SpillReg, DirectiveLoc) || parseComma() || + parseAbsoluteExpression(SpillRegLaneSize) || parseComma() || + parseRegisterOrRegisterNumber(MaskReg, DirectiveLoc) || parseComma() || + parseAbsoluteExpression(MaskRegSize) || parseEOL()) + return true; + + getStreamer().emitCFILLVMVectorRegisterMask( + Register, SpillReg, SpillRegLaneSize, MaskReg, MaskRegSize, DirectiveLoc); + return false; +} + /// parseDirectiveCFILabel /// ::= .cfi_label label bool AsmParser::parseDirectiveCFILabel(SMLoc Loc) { @@ -5444,6 +5545,11 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".cfi_llvm_def_aspace_cfa"] = DK_CFI_LLVM_DEF_ASPACE_CFA; DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET; DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET; + DirectiveKindMap[".cfi_llvm_register_pair"] = DK_CFI_LLVM_REGISTER_PAIR; + DirectiveKindMap[".cfi_llvm_vector_registers"] = DK_CFI_LLVM_VECTOR_REGISTERS; + DirectiveKindMap[".cfi_llvm_vector_offset"] = DK_CFI_LLVM_VECTOR_OFFSET; + DirectiveKindMap[".cfi_llvm_vector_register_mask"] = + DK_CFI_LLVM_VECTOR_REGISTER_MASK; DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY; DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA; DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE; diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index d4901d95e565a..55b1bb8c22689 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -5275,6 +5275,10 @@ void MasmParser::initializeDirectiveKindMap() { // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER; // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET; // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET; + // DirectiveKindMap[".cfi_llvm_register_pair"] = DK_CFI_LLVM_REGISTER_PAIR; + // DirectiveKindMap[".cfi_llvm_vector_registers"] = + // DK_CFI_LLVM_VECTOR_REGISTERS; + // DirectiveKindMap[".cfi_llvm_vector_offset"] = DK_CFI_LLVM_VECTOR_OFFSET; // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY; // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA; // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE; diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index bc7398120096e..27a87a6281340 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -630,6 +630,60 @@ void MCStreamer::emitCFIGnuArgsSize(int64_t Size, SMLoc Loc) { CurFrame->Instructions.push_back(std::move(Instruction)); } +void MCStreamer::emitCFILLVMRegisterPair(int64_t Register, int64_t R1, + int64_t R1Size, int64_t R2, + int64_t R2Size, SMLoc Loc) { + MCSymbol *Label = emitCFILabel(); + MCCFIInstruction Instruction = MCCFIInstruction::createLLVMRegisterPair( + Label, Register, R1, R1Size, R2, R2Size, Loc); + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; + CurFrame->Instructions.push_back(std::move(Instruction)); +} + +void MCStreamer::emitCFILLVMVectorRegisters( + int64_t Register, std::vector VRs, + SMLoc Loc) { + MCSymbol *Label = emitCFILabel(); + MCCFIInstruction Instruction = MCCFIInstruction::createLLVMVectorRegisters( + Label, Register, std::move(VRs), Loc); + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; + CurFrame->Instructions.push_back(std::move(Instruction)); +} + +void MCStreamer::emitCFILLVMVectorOffset(int64_t Register, + int64_t RegisterSizeInBits, + int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, + int64_t Offset, SMLoc Loc) { + MCSymbol *Label = emitCFILabel(); + MCCFIInstruction Instruction = MCCFIInstruction::createLLVMVectorOffset( + Label, Register, RegisterSizeInBits, MaskRegister, MaskRegisterSizeInBits, + Offset, Loc); + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; + CurFrame->Instructions.push_back(std::move(Instruction)); +} + +void MCStreamer::emitCFILLVMVectorRegisterMask( + int64_t Register, int64_t SpillRegister, + int64_t SpillRegisterLaneSizeInBits, int64_t MaskRegister, + int64_t MaskRegisterSizeInBits, SMLoc Loc) { + + MCSymbol *Label = emitCFILabel(); + MCCFIInstruction Instruction = MCCFIInstruction::createLLVMVectorRegisterMask( + Label, Register, SpillRegister, SpillRegisterLaneSizeInBits, MaskRegister, + MaskRegisterSizeInBits, Loc); + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; + CurFrame->Instructions.push_back(std::move(Instruction)); +} + void MCStreamer::emitCFISignalFrame() { MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); if (!CurFrame) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 5c39f7a3d6daa..bbde3c49f64c6 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -12,9 +12,12 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/LEB128.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -27,6 +30,10 @@ static cl::opt EnableSpillVGPRToAGPR( cl::ReallyHidden, cl::init(true)); +static constexpr unsigned SGPRBitSize = 32; +static constexpr unsigned SGPRByteSize = SGPRBitSize / 8; +static constexpr unsigned VGPRLaneBitSize = 32; + // Find a register matching \p RC from \p LiveUnits which is unused and // available throughout the function. On failure, returns AMDGPU::NoRegister. // TODO: Rewrite the loop here to iterate over MCRegUnits instead of @@ -43,6 +50,81 @@ static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, return MCRegister(); } +static bool needsFrameMoves(const MachineFunction &MF) { + // FIXME: There are some places in the compiler which are sensitive to the CFI + // pseudos and so using MachineFunction::needsFrameMoves has the unintended + // effect of making enabling debug info affect codegen. Once we have + // identified and fixed those cases this should be replaced with + // MF.needsFrameMoves() + return true; +} + +static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS) { + assert(DwarfReg >= 0); + if (DwarfReg < 32) { + OS << uint8_t(dwarf::DW_OP_reg0 + DwarfReg); + } else { + OS << uint8_t(dwarf::DW_OP_regx); + encodeULEB128(DwarfReg, OS); + } +} + +static MCCFIInstruction +createScaledCFAInPrivateWave(const GCNSubtarget &ST, + MCRegister DwarfStackPtrReg) { + assert(ST.enableFlatScratch()); + + // When flat scratch is enabled, the stack pointer is an address in the + // private_lane DWARF address space (i.e. swizzled), but in order to + // accurately and efficiently describe things like masked spills of vector + // registers we want to define the CFA to be an address in the private_wave + // DWARF address space (i.e. unswizzled). To achieve this we scale the stack + // pointer by the wavefront size, implemented as (SP << wave_size_log2). + const unsigned WavefrontSizeLog2 = ST.getWavefrontSizeLog2(); + assert(WavefrontSizeLog2 < 32); + + SmallString<20> Block; + raw_svector_ostream OSBlock(Block); + encodeDwarfRegisterLocation(DwarfStackPtrReg, OSBlock); + OSBlock << uint8_t(dwarf::DW_OP_deref_size) << uint8_t(SGPRByteSize) + << uint8_t(dwarf::DW_OP_lit0 + WavefrontSizeLog2) + << uint8_t(dwarf::DW_OP_shl) + << uint8_t(dwarf::DW_OP_lit0 + + dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave) + << uint8_t(dwarf::DW_OP_LLVM_user) + << uint8_t(dwarf::DW_OP_LLVM_form_aspace_address); + + SmallString<20> CFIInst; + raw_svector_ostream OSCFIInst(CFIInst); + OSCFIInst << uint8_t(dwarf::DW_CFA_def_cfa_expression); + encodeULEB128(Block.size(), OSCFIInst); + OSCFIInst << Block; + + return MCCFIInstruction::createEscape(nullptr, OSCFIInst.str()); +} + +void SIFrameLowering::emitDefCFA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc const &DL, Register StackPtrReg, + bool AspaceAlreadyDefined, + MachineInstr::MIFlag Flags) const { + MachineFunction &MF = *MBB.getParent(); + const GCNSubtarget &ST = MF.getSubtarget(); + const MCRegisterInfo *MCRI = MF.getContext().getRegisterInfo(); + + MCRegister DwarfStackPtrReg = MCRI->getDwarfRegNum(StackPtrReg, false); + MCCFIInstruction CFIInst = + ST.enableFlatScratch() + ? createScaledCFAInPrivateWave(ST, DwarfStackPtrReg) + : (AspaceAlreadyDefined + ? MCCFIInstruction::createLLVMDefAspaceCfa( + nullptr, DwarfStackPtrReg, 0, + dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave, SMLoc()) + : MCCFIInstruction::createDefCfaRegister(nullptr, + DwarfStackPtrReg)); + buildCFI(MBB, MBBI, DL, CFIInst, Flags); +} + // Find a scratch register that we can use in the prologue. We avoid using // callee-save registers since they may appear to be free when this is called // from canUseAsPrologue (during shrink wrapping), but then no longer be free @@ -231,6 +313,8 @@ class PrologEpilogSGPRSpillBuilder { SIMachineFunctionInfo *FuncInfo; const SIInstrInfo *TII; const SIRegisterInfo &TRI; + const MCRegisterInfo *MCRI; + const SIFrameLowering *TFI; Register SuperReg; const PrologEpilogSGPRSaveRestoreInfo SI; LiveRegUnits &LiveUnits; @@ -239,9 +323,16 @@ class PrologEpilogSGPRSpillBuilder { ArrayRef SplitParts; unsigned NumSubRegs; unsigned EltSize = 4; + bool IsFramePtrPrologSpill; + bool NeedsFrameMoves; + + bool isExec(Register Reg) const { + return Reg == AMDGPU::EXEC_LO || Reg == AMDGPU::EXEC; + } void saveToMemory(const int FI) const { MachineRegisterInfo &MRI = MF.getRegInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); assert(!MFI.isDeadObjectIndex(FI)); initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true); @@ -260,6 +351,20 @@ class PrologEpilogSGPRSpillBuilder { buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR, FI, FrameReg, DwordOff); + if (NeedsFrameMoves) { + if (isExec(SuperReg) && (I == NumSubRegs - 1)) + SubReg = AMDGPU::EXEC; + else if (IsFramePtrPrologSpill) + SubReg = FuncInfo->getFrameOffsetReg(); + + // FIXME: CFI for EXEC needs a fix by accurately computing the spill + // offset for both the low and high components. + if (SubReg != AMDGPU::EXEC_LO) + TFI->buildCFI(MBB, MI, DL, + MCCFIInstruction::createOffset( + nullptr, MCRI->getDwarfRegNum(SubReg, false), + MFI.getObjectOffset(FI) * ST.getWavefrontSize())); + } DwordOff += 4; } } @@ -281,6 +386,19 @@ class PrologEpilogSGPRSpillBuilder { .addReg(SubReg) .addImm(Spill[I].Lane) .addReg(Spill[I].VGPR, RegState::Undef); + if (NeedsFrameMoves) { + if (isExec(SuperReg)) { + if (I == NumSubRegs - 1) + TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, AMDGPU::EXEC, Spill); + } else if (IsFramePtrPrologSpill) { + TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, + FuncInfo->getFrameOffsetReg(), + Spill[I].VGPR, Spill[I].Lane); + } else { + TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, SubReg, Spill[I].VGPR, + Spill[I].Lane); + } + } } } @@ -288,10 +406,35 @@ class PrologEpilogSGPRSpillBuilder { BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg) .addReg(SuperReg) .setMIFlag(MachineInstr::FrameSetup); + if (NeedsFrameMoves) { + const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(DstReg); + ArrayRef DstSplitParts = TRI.getRegSplitParts(RC, EltSize); + unsigned DstNumSubRegs = DstSplitParts.empty() ? 1 : DstSplitParts.size(); + assert(NumSubRegs == DstNumSubRegs); + for (unsigned I = 0; I < NumSubRegs; ++I) { + Register SrcSubReg = + NumSubRegs == 1 ? SuperReg + : Register(TRI.getSubReg(SuperReg, SplitParts[I])); + Register DstSubReg = + NumSubRegs == 1 ? DstReg + : Register(TRI.getSubReg(DstReg, DstSplitParts[I])); + if (isExec(SuperReg)) { + if (I == NumSubRegs - 1) + TFI->buildCFIForRegToSGPRPairSpill(MBB, MI, DL, AMDGPU::EXEC, + DstReg); + } else { + TFI->buildCFI(MBB, MI, DL, + MCCFIInstruction::createRegister( + nullptr, MCRI->getDwarfRegNum(SrcSubReg, false), + MCRI->getDwarfRegNum(DstSubReg, false))); + } + } + } } void restoreFromMemory(const int FI) { MachineRegisterInfo &MRI = MF.getRegInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false); MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( @@ -343,12 +486,15 @@ class PrologEpilogSGPRSpillBuilder { MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, - LiveRegUnits &LiveUnits, Register FrameReg) + LiveRegUnits &LiveUnits, Register FrameReg, + bool IsFramePtrPrologSpill = false) : MI(MI), MBB(MBB), MF(*MBB.getParent()), ST(MF.getSubtarget()), MFI(MF.getFrameInfo()), FuncInfo(MF.getInfo()), TII(TII), TRI(TRI), - SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL), - FrameReg(FrameReg) { + MCRI(MF.getContext().getRegisterInfo()), TFI(ST.getFrameLowering()), + SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL), FrameReg(FrameReg), + IsFramePtrPrologSpill(IsFramePtrPrologSpill), + NeedsFrameMoves(needsFrameMoves(MF)) { const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg); SplitParts = TRI.getRegSplitParts(RC, EltSize); NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); @@ -615,10 +761,39 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, const SIRegisterInfo *TRI = &TII->getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); + const MCRegisterInfo *MCRI = MF.getContext().getRegisterInfo(); MachineFrameInfo &FrameInfo = MF.getFrameInfo(); assert(MFI->isEntryFunction()); + // Debug location must be unknown since the first debug location is used to + // determine the end of the prologue. + DebugLoc DL; + MachineBasicBlock::iterator I = MBB.begin(); + + if (needsFrameMoves(MF)) { + // On entry the SP/FP are not set up, so we need to define the CFA in terms + // of a literal location expression. + static const char CFAEncodedInstUserOpsArr[] = { + dwarf::DW_CFA_def_cfa_expression, + 4, // length + static_cast(dwarf::DW_OP_lit0), + static_cast(dwarf::DW_OP_lit0 + + dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave), + static_cast(dwarf::DW_OP_LLVM_user), + static_cast(dwarf::DW_OP_LLVM_form_aspace_address)}; + static StringRef CFAEncodedInstUserOps = + StringRef(CFAEncodedInstUserOpsArr, sizeof(CFAEncodedInstUserOpsArr)); + buildCFI(MBB, I, DL, + MCCFIInstruction::createEscape(nullptr, CFAEncodedInstUserOps, + SMLoc(), + "CFA is 0 in private_wave aspace")); + // Unwinding halts when the return address (PC) is undefined. + buildCFI(MBB, I, DL, + MCCFIInstruction::createUndefined( + nullptr, MCRI->getDwarfRegNum(AMDGPU::PC_REG, false))); + } + Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); @@ -655,11 +830,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, } } - // Debug location must be unknown since the first debug location is used to - // determine the end of the prologue. - DebugLoc DL; - MachineBasicBlock::iterator I = MBB.begin(); - // We found the SRSRC first because it needs four registers and has an // alignment requirement. If the SRSRC that we found is clobbering with // the scratch wave offset, which may be in a fixed SGPR or a free SGPR @@ -932,6 +1102,50 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { llvm_unreachable("Invalid TargetStackID::Value"); } +void SIFrameLowering::emitPrologueEntryCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL) const { + const MachineFunction &MF = *MBB.getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MCRegisterInfo *MCRI = MF.getContext().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo(); + Register StackPtrReg = + MF.getInfo()->getStackPtrOffsetReg(); + + emitDefCFA(MBB, MBBI, DL, StackPtrReg, /*AspaceAlreadyDefined=*/true, + MachineInstr::FrameSetup); + + buildCFIForRegToSGPRPairSpill(MBB, MBBI, DL, AMDGPU::PC_REG, + TRI.getReturnAddressReg(MF)); + + BitVector IsCalleeSaved(TRI.getNumRegs()); + const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); + for (unsigned I = 0; CSRegs[I]; ++I) { + IsCalleeSaved.set(CSRegs[I]); + } + auto ProcessReg = [&](MCPhysReg Reg) { + if (IsCalleeSaved.test(Reg) || !MRI.isPhysRegModified(Reg)) + return; + MCRegister DwarfReg = MCRI->getDwarfRegNum(Reg, false); + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createUndefined(nullptr, DwarfReg)); + }; + + // Emit CFI rules for caller saved Arch VGPRs which are clobbered + unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256; + for_each(AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs), + ProcessReg); + + // Emit CFI rules for caller saved Accum VGPRs which are clobbered + if (ST.hasMAIInsts()) { + for_each(AMDGPU::AGPR_32RegClass.getRegisters(), ProcessReg); + } + + // Emit CFI rules for caller saved SGPRs which are clobbered + for_each(AMDGPU::SGPR_32RegClass.getRegisters(), ProcessReg); +} + // Activate only the inactive lanes when \p EnableInactiveLanes is true. // Otherwise, activate all lanes. It returns the saved exec. static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, @@ -978,14 +1192,19 @@ static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, return ScratchExecCopy; } -void SIFrameLowering::emitCSRSpillStores( - MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, - Register FrameReg, Register FramePtrRegScratchCopy) const { +void SIFrameLowering::emitCSRSpillStores(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc &DL, LiveRegUnits &LiveUnits, + Register FrameReg, + Register FramePtrRegScratchCopy, + const bool NeedsFrameMoves) const { SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); + const MCRegisterInfo *MCRI = MF.getContext().getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get(ST); @@ -1007,6 +1226,12 @@ void SIFrameLowering::emitCSRSpillStores( int FI = Reg.second; buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL, VGPR, FI, FrameReg); + if (NeedsFrameMoves) + // We spill the entire VGPR, so we can get away with just cfi_offset + buildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset( + nullptr, MCRI->getDwarfRegNum(VGPR, false), + MFI.getObjectOffset(FI) * ST.getWavefrontSize())); } }; @@ -1055,13 +1280,13 @@ void SIFrameLowering::emitCSRSpillStores( // Skip if FP is saved to a scratch SGPR, the save has already been emitted. // Otherwise, FP has been moved to a temporary register and spill it // instead. - Register Reg = - Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first; + bool IsFramePtrPrologSpill = Spill.first == FramePtrReg ? true : false; + Register Reg = IsFramePtrPrologSpill ? FramePtrRegScratchCopy : Spill.first; if (!Reg) continue; PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI, - LiveUnits, FrameReg); + LiveUnits, FrameReg, IsFramePtrPrologSpill); SB.save(); } @@ -1229,6 +1454,11 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, uint32_t NumBytes = MFI.getStackSize(); uint32_t RoundedSize = NumBytes; + const bool NeedsFrameMoves = needsFrameMoves(MF); + + if (NeedsFrameMoves) + emitPrologueEntryCFI(MBB, MBBI, DL); + if (TRI.hasStackRealignment(MF)) HasFP = true; @@ -1237,7 +1467,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, // Emit the CSR spill stores with SP base register. emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FuncInfo->isChainFunction() ? Register() : StackPtrReg, - FramePtrRegScratchCopy); + FramePtrRegScratchCopy, NeedsFrameMoves); } else { // CSR spill stores will use FP as base register. Register SGPRForFPSaveRestoreCopy = @@ -1251,7 +1481,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, PrologEpilogSGPRSpillBuilder SB( FramePtrReg, FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI, - DL, TII, TRI, LiveUnits, FramePtrReg); + DL, TII, TRI, LiveUnits, FramePtrReg, + /*IsFramePtrPrologSpill*/ true); SB.save(); LiveUnits.addReg(SGPRForFPSaveRestoreCopy); } else { @@ -1298,7 +1529,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, // If FP is used, emit the CSR spills with FP base register. if (HasFP) { emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg, - FramePtrRegScratchCopy); + FramePtrRegScratchCopy, NeedsFrameMoves); if (FramePtrRegScratchCopy) LiveUnits.removeReg(FramePtrRegScratchCopy); } @@ -1313,6 +1544,12 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + if (HasFP) { + if (NeedsFrameMoves) + emitDefCFA(MBB, MBBI, DL, FramePtrReg, /*AspaceAlreadyDefined=*/false, + MachineInstr::FrameSetup); + } + if (HasFP && RoundedSize != 0) { auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) .addReg(StackPtrReg) @@ -1412,6 +1649,13 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, FramePtrRegScratchCopy); } + const bool NeedsFrameMoves = needsFrameMoves(MF); + if (hasFP(MF)) { + if (NeedsFrameMoves) + emitDefCFA(MBB, MBBI, DL, StackPtrReg, /*AspaceAlreadyDefined=*/false, + MachineInstr::FrameDestroy); + } + if (FPSaved) { // Insert the copy to restore FP. Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy @@ -2000,17 +2244,49 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP( return true; } +static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB, + const TargetRegisterInfo *TRI) { + for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) { + if (MBB.isLiveIn(*R)) { + return true; + } + } + return false; +} + bool SIFrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef CSI, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); const GCNSubtarget &ST = MF->getSubtarget(); - if (!ST.useVGPRBlockOpsForCSR()) - return false; + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo *SITRI = static_cast(TRI); + + if (!ST.useVGPRBlockOpsForCSR()) { + for (const CalleeSavedInfo &CS : CSI) { + // Insert the spill to the stack frame. + unsigned Reg = CS.getReg(); + + if (CS.isSpilledToReg()) { + BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY), + CS.getDstReg()) + .addReg(Reg, getKillRegState(true)); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( + Reg, Reg == SITRI->getReturnAddressReg(*MF) ? MVT::i64 : MVT::i32); + // If this value was already livein, we probably have a direct use of + // the incoming register value, so don't kill at the spill point. This + // happens since we pass some special inputs (workgroup IDs) in the + // callee saved range. + const bool IsLiveIn = isLiveIntoMBB(Reg, MBB, TRI); + TII->storeRegToStackSlotCFI(MBB, MI, Reg, !IsLiveIn, CS.getFrameIdx(), + RC, TRI); + } + } + return true; + } MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - SIMachineFunctionInfo *MFI = MF->getInfo(); - const SIInstrInfo *TII = ST.getInstrInfo(); SIMachineFunctionInfo *FuncInfo = MF->getInfo(); const TargetRegisterClass *BlockRegClass = @@ -2034,10 +2310,10 @@ bool SIFrameLowering::spillCalleeSavedRegisters( FrameInfo.getObjectAlign(FrameIndex)); BuildMI(MBB, MI, MI->getDebugLoc(), - TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE)) + TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE)) .addReg(Reg, getKillRegState(false)) .addFrameIndex(FrameIndex) - .addReg(MFI->getStackPtrOffsetReg()) + .addReg(FuncInfo->getStackPtrOffsetReg()) .addImm(0) .addImm(Mask) .addMemOperand(MMO); @@ -2210,3 +2486,139 @@ bool SIFrameLowering::requiresStackPointerReference( // references the SP, like variable sized stack objects. return frameTriviallyRequiresSP(MFI); } + +MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, + const MCCFIInstruction &CFIInst, + MachineInstr::MIFlag flag) const { + MachineFunction &MF = *MBB.getParent(); + const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + return BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(MF.addFrameInst(CFIInst)) + .setMIFlag(flag); +} + +MachineInstr *SIFrameLowering::buildCFIForVRegToVRegSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const Register Reg, const Register RegCopy) const { + MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + + unsigned MaskReg = MCRI.getDwarfRegNum( + ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false); + auto CFIInst = MCCFIInstruction::createLLVMVectorRegisterMask( + nullptr, MCRI.getDwarfRegNum(Reg, false), + MCRI.getDwarfRegNum(RegCopy, false), VGPRLaneBitSize, MaskReg, + ST.getWavefrontSize()); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} + +MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const Register SGPR, const Register VGPR, + const int Lane) const { + const MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + + int DwarfSGPR = MCRI.getDwarfRegNum(SGPR, false); + int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false); + assert(DwarfSGPR != -1 && DwarfVGPR != -1); + assert(Lane != -1 && "Expected a lane to be present"); + + // Build a CFI instruction that represents a SGPR spilled to a single lane of + // a VGPR. + MCCFIInstruction::VectorRegisterWithLane VR{unsigned(DwarfVGPR), + unsigned(Lane), VGPRLaneBitSize}; + auto CFIInst = + MCCFIInstruction::createLLVMVectorRegisters(nullptr, DwarfSGPR, {VR}); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} + +MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register SGPR, + ArrayRef VGPRSpills) const { + const MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + + int DwarfSGPR = MCRI.getDwarfRegNum(SGPR, false); + assert(DwarfSGPR != -1); + + // Build a CFI instruction that represents a SGPR spilled to multiple lanes of + // multiple VGPRs. + + std::vector VGPRs; + for (SIRegisterInfo::SpilledReg Spill : VGPRSpills) { + int DwarfVGPR = MCRI.getDwarfRegNum(Spill.VGPR, false); + assert(DwarfVGPR != -1); + assert(Spill.hasLane() && "Expected a lane to be present"); + VGPRs.push_back( + {unsigned(DwarfVGPR), unsigned(Spill.Lane), VGPRLaneBitSize}); + } + + auto CFIInst = MCCFIInstruction::createLLVMVectorRegisters(nullptr, DwarfSGPR, + std::move(VGPRs)); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} + +MachineInstr *SIFrameLowering::buildCFIForSGPRToVMEMSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned SGPR, int64_t Offset) const { + MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + return buildCFI(MBB, MBBI, DL, + llvm::MCCFIInstruction::createOffset( + nullptr, MCRI.getDwarfRegNum(SGPR, false), Offset)); +} + +MachineInstr *SIFrameLowering::buildCFIForVGPRToVMEMSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned VGPR, int64_t Offset) const { + const MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + + int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false); + assert(DwarfVGPR != -1); + + unsigned MaskReg = MCRI.getDwarfRegNum( + ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false); + auto CFIInst = MCCFIInstruction::createLLVMVectorOffset( + nullptr, DwarfVGPR, VGPRLaneBitSize, MaskReg, ST.getWavefrontSize(), + Offset); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} + +MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const Register Reg, const Register SGPRPair) const { + const MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo(); + + int SGPR0 = TRI.getSubReg(SGPRPair, AMDGPU::sub0); + int SGPR1 = TRI.getSubReg(SGPRPair, AMDGPU::sub1); + + int DwarfReg = MCRI.getDwarfRegNum(Reg, false); + int DwarfSGPR0 = MCRI.getDwarfRegNum(SGPR0, false); + int DwarfSGPR1 = MCRI.getDwarfRegNum(SGPR1, false); + assert(DwarfReg != -1 && DwarfSGPR0 != 1 && DwarfSGPR1 != 1); + + auto CFIInst = MCCFIInstruction::createLLVMRegisterPair( + nullptr, DwarfReg, DwarfSGPR0, SGPRBitSize, DwarfSGPR1, SGPRBitSize); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} + +MachineInstr * +SIFrameLowering::buildCFIForSameValue(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg) const { + const MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo(); + int DwarfReg = MCRI.getDwarfRegNum(Reg, /*isEH=*/false); + auto CFIInst = MCCFIInstruction::createSameValue(nullptr, DwarfReg); + return buildCFI(MBB, MBBI, DL, std::move(CFIInst)); +} diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index a72772987262e..2b716db0b7a22 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -39,7 +39,8 @@ class SIFrameLowering final : public AMDGPUFrameLowering { void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, - Register FramePtrRegScratchCopy) const; + Register FramePtrRegScratchCopy, + const bool NeedsFrameMoves) const; void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, @@ -101,9 +102,64 @@ class SIFrameLowering final : public AMDGPUFrameLowering { Register PreloadedPrivateBufferReg, Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const; + void emitPrologueEntryCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL) const; + + void emitDefCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc const &DL, Register StackPtrReg, + bool AspaceAlreadyDefined, + MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const; + public: bool requiresStackPointerReference(const MachineFunction &MF) const; + /// Create a CFI index for CFIInst and build a MachineInstr around it. + MachineInstr * + buildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const MCCFIInstruction &CFIInst, + MachineInstr::MIFlag flag = MachineInstr::FrameSetup) const; + + /// Create a CFI index describing a spill of the VGPR/AGPR \p Reg to another + /// VGPR/AGPR \p RegCopy and build a MachineInstr around it. + MachineInstr *buildCFIForVRegToVRegSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, + const Register Reg, + const Register RegCopy) const; + /// Create a CFI index describing a spill of an SGPR to a single lane of + /// a VGPR and build a MachineInstr around it. + MachineInstr *buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, + const Register SGPR, + const Register VGPR, + const int Lane) const; + /// Create a CFI index describing a spill of an SGPR to multiple lanes of + /// VGPRs and build a MachineInstr around it. + MachineInstr *buildCFIForSGPRToVGPRSpill( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register SGPR, + ArrayRef VGPRSpills) const; + /// Create a CFI index describing a spill of a SGPR to VMEM and + /// build a MachineInstr around it. + MachineInstr *buildCFIForSGPRToVMEMSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned SGPR, + int64_t Offset) const; + /// Create a CFI index describing a spill of a VGPR to VMEM and + /// build a MachineInstr around it. + MachineInstr *buildCFIForVGPRToVMEMSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned VGPR, + int64_t Offset) const; + MachineInstr *buildCFIForRegToSGPRPairSpill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg, + Register SGPRPair) const; + MachineInstr *buildCFIForSameValue(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register Reg) const; // Returns true if the function may need to reserve space on the stack for the // CWSR trap handler. bool mayReserveScratchForCWSR(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d930a21c2d7f5..a097e721d142f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1530,22 +1530,26 @@ SIInstrInfo::getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, return get(getIndirectVGPRWriteMovRelPseudoOpc(VecSize)); } -static unsigned getSGPRSpillSaveOpcode(unsigned Size) { +static unsigned getSGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI) { switch (Size) { case 4: - return AMDGPU::SI_SPILL_S32_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S32_CFI_SAVE : AMDGPU::SI_SPILL_S32_SAVE; case 8: - return AMDGPU::SI_SPILL_S64_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S64_CFI_SAVE : AMDGPU::SI_SPILL_S64_SAVE; case 12: - return AMDGPU::SI_SPILL_S96_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S96_CFI_SAVE : AMDGPU::SI_SPILL_S96_SAVE; case 16: - return AMDGPU::SI_SPILL_S128_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S128_CFI_SAVE + : AMDGPU::SI_SPILL_S128_SAVE; case 20: - return AMDGPU::SI_SPILL_S160_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S160_CFI_SAVE + : AMDGPU::SI_SPILL_S160_SAVE; case 24: - return AMDGPU::SI_SPILL_S192_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S192_CFI_SAVE + : AMDGPU::SI_SPILL_S192_SAVE; case 28: - return AMDGPU::SI_SPILL_S224_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S224_CFI_SAVE + : AMDGPU::SI_SPILL_S224_SAVE; case 32: return AMDGPU::SI_SPILL_S256_SAVE; case 36: @@ -1557,69 +1561,90 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) { case 48: return AMDGPU::SI_SPILL_S384_SAVE; case 64: - return AMDGPU::SI_SPILL_S512_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S512_CFI_SAVE + : AMDGPU::SI_SPILL_S512_SAVE; case 128: - return AMDGPU::SI_SPILL_S1024_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_S1024_CFI_SAVE + : AMDGPU::SI_SPILL_S1024_SAVE; default: llvm_unreachable("unknown register size"); } } -static unsigned getVGPRSpillSaveOpcode(unsigned Size) { +static unsigned getVGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI) { switch (Size) { case 2: return AMDGPU::SI_SPILL_V16_SAVE; case 4: - return AMDGPU::SI_SPILL_V32_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V32_CFI_SAVE : AMDGPU::SI_SPILL_V32_SAVE; case 8: - return AMDGPU::SI_SPILL_V64_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V64_CFI_SAVE : AMDGPU::SI_SPILL_V64_SAVE; case 12: - return AMDGPU::SI_SPILL_V96_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V96_CFI_SAVE : AMDGPU::SI_SPILL_V96_SAVE; case 16: - return AMDGPU::SI_SPILL_V128_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V128_CFI_SAVE + : AMDGPU::SI_SPILL_V128_SAVE; case 20: - return AMDGPU::SI_SPILL_V160_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V160_CFI_SAVE + : AMDGPU::SI_SPILL_V160_SAVE; case 24: - return AMDGPU::SI_SPILL_V192_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V192_CFI_SAVE + : AMDGPU::SI_SPILL_V192_SAVE; case 28: - return AMDGPU::SI_SPILL_V224_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V224_CFI_SAVE + : AMDGPU::SI_SPILL_V224_SAVE; case 32: - return AMDGPU::SI_SPILL_V256_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V256_CFI_SAVE + : AMDGPU::SI_SPILL_V256_SAVE; case 36: - return AMDGPU::SI_SPILL_V288_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V288_CFI_SAVE + : AMDGPU::SI_SPILL_V288_SAVE; case 40: - return AMDGPU::SI_SPILL_V320_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V320_CFI_SAVE + : AMDGPU::SI_SPILL_V320_SAVE; case 44: - return AMDGPU::SI_SPILL_V352_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V352_CFI_SAVE + : AMDGPU::SI_SPILL_V352_SAVE; case 48: - return AMDGPU::SI_SPILL_V384_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V384_CFI_SAVE + : AMDGPU::SI_SPILL_V384_SAVE; case 64: - return AMDGPU::SI_SPILL_V512_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V512_CFI_SAVE + : AMDGPU::SI_SPILL_V512_SAVE; case 128: - return AMDGPU::SI_SPILL_V1024_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_V1024_CFI_SAVE + : AMDGPU::SI_SPILL_V1024_SAVE; default: llvm_unreachable("unknown register size"); } } -static unsigned getAVSpillSaveOpcode(unsigned Size) { +static unsigned getAVSpillSaveOpcode(unsigned Size, bool NeedsCFI) { switch (Size) { case 4: - return AMDGPU::SI_SPILL_AV32_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV32_CFI_SAVE + : AMDGPU::SI_SPILL_AV32_SAVE; case 8: - return AMDGPU::SI_SPILL_AV64_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV64_CFI_SAVE + : AMDGPU::SI_SPILL_AV64_SAVE; case 12: - return AMDGPU::SI_SPILL_AV96_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV96_CFI_SAVE + : AMDGPU::SI_SPILL_AV96_SAVE; case 16: - return AMDGPU::SI_SPILL_AV128_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV128_CFI_SAVE + : AMDGPU::SI_SPILL_AV128_SAVE; case 20: - return AMDGPU::SI_SPILL_AV160_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV160_CFI_SAVE + : AMDGPU::SI_SPILL_AV160_SAVE; case 24: - return AMDGPU::SI_SPILL_AV192_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV192_CFI_SAVE + : AMDGPU::SI_SPILL_AV192_SAVE; case 28: - return AMDGPU::SI_SPILL_AV224_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV224_CFI_SAVE + : AMDGPU::SI_SPILL_AV224_SAVE; case 32: - return AMDGPU::SI_SPILL_AV256_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV256_CFI_SAVE + : AMDGPU::SI_SPILL_AV256_SAVE; case 36: return AMDGPU::SI_SPILL_AV288_SAVE; case 40: @@ -1629,9 +1654,11 @@ static unsigned getAVSpillSaveOpcode(unsigned Size) { case 48: return AMDGPU::SI_SPILL_AV384_SAVE; case 64: - return AMDGPU::SI_SPILL_AV512_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV512_CFI_SAVE + : AMDGPU::SI_SPILL_AV512_SAVE; case 128: - return AMDGPU::SI_SPILL_AV1024_SAVE; + return NeedsCFI ? AMDGPU::SI_SPILL_AV1024_CFI_SAVE + : AMDGPU::SI_SPILL_AV1024_SAVE; default: llvm_unreachable("unknown register size"); } @@ -1651,7 +1678,7 @@ static unsigned getWWMRegSpillSaveOpcode(unsigned Size, unsigned SIInstrInfo::getVectorRegSpillSaveOpcode( Register Reg, const TargetRegisterClass *RC, unsigned Size, - const SIMachineFunctionInfo &MFI) const { + const SIMachineFunctionInfo &MFI, bool NeedsCFI) const { bool IsVectorSuperClass = RI.isVectorSuperClass(RC); // Choose the right opcode if spilling a WWM register. @@ -1660,16 +1687,16 @@ unsigned SIInstrInfo::getVectorRegSpillSaveOpcode( // TODO: Check if AGPRs are available if (ST.hasMAIInsts()) - return getAVSpillSaveOpcode(Size); + return getAVSpillSaveOpcode(Size, NeedsCFI); - return getVGPRSpillSaveOpcode(Size); + return getVGPRSpillSaveOpcode(Size, NeedsCFI); } -void SIInstrInfo::storeRegToStackSlot( +void SIInstrInfo::storeRegToStackSlotImpl( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, - MachineInstr::MIFlag Flags) const { + const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags, + bool NeedsCFI) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); @@ -1691,7 +1718,8 @@ void SIInstrInfo::storeRegToStackSlot( // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for spilling SGPRs. - const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize)); + const MCInstrDesc &OpDesc = + get(getSGPRSpillSaveOpcode(SpillSize, NeedsCFI)); // The SGPR spill/restore instructions only work on number sgprs, so we need // to make sure we are using the correct register class. @@ -1710,8 +1738,8 @@ void SIInstrInfo::storeRegToStackSlot( return; } - unsigned Opcode = - getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC, SpillSize, *MFI); + unsigned Opcode = getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC, + SpillSize, *MFI, NeedsCFI); MFI->setHasSpilledVGPRs(); BuildMI(MBB, MI, DL, get(Opcode)) @@ -1722,6 +1750,25 @@ void SIInstrInfo::storeRegToStackSlot( .addMemOperand(MMO); } +void SIInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg, + MachineInstr::MIFlag Flags) const { + storeRegToStackSlotImpl(MBB, MI, SrcReg, isKill, FrameIndex, RC, TRI, VReg, + Flags, false); +} + +void SIInstrInfo::storeRegToStackSlotCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + storeRegToStackSlotImpl(MBB, MI, SrcReg, isKill, FrameIndex, RC, TRI, + Register(), MachineInstr::NoFlags, true); +} + static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { switch (Size) { case 4: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 5fdeddaf3f736..9c0a80bbcecda 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -293,13 +293,29 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const; +private: + void storeRegToStackSlotImpl(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg, + MachineInstr::MIFlag Flags, bool NeedsCFI) const; + +public: + void storeRegToStackSlotCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override; unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, - const SIMachineFunctionInfo &MFI) const; + const SIMachineFunctionInfo &MFI, + bool NeedsCFI) const; unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, @@ -703,6 +719,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { static bool isBlockLoadStore(uint16_t Opcode) { switch (Opcode) { case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: + case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE: case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: case AMDGPU::SCRATCH_STORE_BLOCK_SADDR: case AMDGPU::SCRATCH_LOAD_BLOCK_SADDR: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 6f1feb1dc2996..3498f18467466 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1086,6 +1086,11 @@ multiclass SI_SPILL_SGPR { let mayLoad = 0; } + def _CFI_SAVE : PseudoInstSI<(outs), (ins sgpr_class:$data, i32imm:$addr)> { + let mayStore = 1; + let mayLoad = 0; + } + def _RESTORE : PseudoInstSI < (outs sgpr_class:$data), (ins i32imm:$addr)> { @@ -1159,6 +1164,19 @@ multiclass SI_SPILL_VGPR { + let mayStore = 1; + let mayLoad = 0; + // (2 * 4) + (8 * num_subregs) bytes maximum + int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), !add(UsesTmp, 3)), 8); + // Size field is unsigned char and cannot fit more. + let Size = !if(!le(MaxSize, 256), MaxSize, 252); + } + def _RESTORE : VPseudoInstSI < (outs vgpr_class:$vdata), !con( diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 40eeeb8a8630d..62386da94d854 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -100,63 +100,25 @@ INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE, char &llvm::SILowerSGPRSpillsLegacyID = SILowerSGPRSpillsLegacy::ID; -static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB, - const TargetRegisterInfo *TRI) { - for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) { - if (MBB.isLiveIn(*R)) { - return true; - } - } - return false; -} - /// Insert spill code for the callee-saved registers used in the function. -static void insertCSRSaves(MachineBasicBlock &SaveBlock, +static void insertCSRSaves(const GCNSubtarget &ST, MachineBasicBlock &SaveBlock, ArrayRef CSI, SlotIndexes *Indexes, LiveIntervals *LIS) { - MachineFunction &MF = *SaveBlock.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); - const SIRegisterInfo *RI = ST.getRegisterInfo(); - + const TargetFrameLowering *TFI = ST.getFrameLowering(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); MachineBasicBlock::iterator I = SaveBlock.begin(); - if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { - for (const CalleeSavedInfo &CS : CSI) { - // Insert the spill to the stack frame. - MCRegister Reg = CS.getReg(); - - MachineInstrSpan MIS(I, &SaveBlock); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( - Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); - - // If this value was already livein, we probably have a direct use of the - // incoming register value, so don't kill at the spill point. This happens - // since we pass some special inputs (workgroup IDs) in the callee saved - // range. - const bool IsLiveIn = isLiveIntoMBB(Reg, SaveBlock, TRI); - TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), - RC, TRI, Register()); - - if (Indexes) { - assert(std::distance(MIS.begin(), I) == 1); - MachineInstr &Inst = *std::prev(I); - Indexes->insertMachineInstrInMaps(Inst); - } - - if (LIS) - LIS->removeAllRegUnitsForPhysReg(Reg); - } - } else { - // TFI doesn't update Indexes and LIS, so we have to do it separately. - if (Indexes) - Indexes->repairIndexesInRange(&SaveBlock, SaveBlock.begin(), I); - - if (LIS) - for (const CalleeSavedInfo &CS : CSI) - LIS->removeAllRegUnitsForPhysReg(CS.getReg()); - } + MachineInstrSpan MIS(I, &SaveBlock); + bool Success = TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI); + assert(Success && "spillCalleeSavedRegisters should always succeed"); + (void)Success; + + // TFI doesn't update Indexes and LIS, so we have to do it separately. + if (Indexes) + Indexes->repairIndexesInRange(&SaveBlock, SaveBlock.begin(), I); + + if (LIS) + for (const CalleeSavedInfo &CS : CSI) + LIS->removeAllRegUnitsForPhysReg(CS.getReg()); } /// Insert restore code for the callee-saved registers used in the function. @@ -268,11 +230,19 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs( std::vector CSI; const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); + Register RetAddrReg = TRI->getReturnAddressReg(MF); + bool SpillRetAddrReg = false; for (unsigned I = 0; CSRegs[I]; ++I) { MCRegister Reg = CSRegs[I]; if (SavedRegs.test(Reg)) { + if (Reg == TRI->getSubReg(RetAddrReg, AMDGPU::sub0) || + Reg == TRI->getSubReg(RetAddrReg, AMDGPU::sub1)) { + SpillRetAddrReg = true; + continue; + } + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, MVT::i32); int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), @@ -283,9 +253,21 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs( } } + // Return address uses a register pair. Add the super register to the + // CSI list so that it's easier to identify the entire spill and CFI + // can be emitted appropriately. + if (SpillRetAddrReg) { + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(RetAddrReg, MVT::i64); + int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), + TRI->getSpillAlign(*RC), true); + CSI.push_back(CalleeSavedInfo(RetAddrReg, JunkFI)); + CalleeSavedFIs.push_back(JunkFI); + } + if (!CSI.empty()) { for (MachineBasicBlock *SaveBlock : SaveBlocks) - insertCSRSaves(*SaveBlock, CSI, Indexes, LIS); + insertCSRSaves(ST, *SaveBlock, CSI, Indexes, LIS); // Add live ins to save blocks. assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index ebd2e7ecf249e..77608a4cfc751 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1128,6 +1128,7 @@ static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, unsigned Op = MI.getOpcode(); switch (Op) { case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: + case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE: case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: // FIXME: This assumes the mask is statically known and not computed at // runtime. However, some ABIs may want to compute the mask dynamically and @@ -1135,21 +1136,29 @@ static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, return llvm::popcount( (uint64_t)TII->getNamedOperand(MI, AMDGPU::OpName::mask)->getImm()); case AMDGPU::SI_SPILL_S1024_SAVE: + case AMDGPU::SI_SPILL_S1024_CFI_SAVE: case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_V1024_SAVE: + case AMDGPU::SI_SPILL_V1024_CFI_SAVE: case AMDGPU::SI_SPILL_V1024_RESTORE: case AMDGPU::SI_SPILL_A1024_SAVE: + case AMDGPU::SI_SPILL_A1024_CFI_SAVE: case AMDGPU::SI_SPILL_A1024_RESTORE: case AMDGPU::SI_SPILL_AV1024_SAVE: + case AMDGPU::SI_SPILL_AV1024_CFI_SAVE: case AMDGPU::SI_SPILL_AV1024_RESTORE: return 32; case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S512_CFI_SAVE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_V512_SAVE: + case AMDGPU::SI_SPILL_V512_CFI_SAVE: case AMDGPU::SI_SPILL_V512_RESTORE: case AMDGPU::SI_SPILL_A512_SAVE: + case AMDGPU::SI_SPILL_A512_CFI_SAVE: case AMDGPU::SI_SPILL_A512_RESTORE: case AMDGPU::SI_SPILL_AV512_SAVE: + case AMDGPU::SI_SPILL_AV512_CFI_SAVE: case AMDGPU::SI_SPILL_AV512_RESTORE: return 16; case AMDGPU::SI_SPILL_S384_SAVE: @@ -1189,75 +1198,107 @@ static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, case AMDGPU::SI_SPILL_AV288_RESTORE: return 9; case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S256_CFI_SAVE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_V256_SAVE: + case AMDGPU::SI_SPILL_V256_CFI_SAVE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_A256_SAVE: + case AMDGPU::SI_SPILL_A256_CFI_SAVE: case AMDGPU::SI_SPILL_A256_RESTORE: case AMDGPU::SI_SPILL_AV256_SAVE: + case AMDGPU::SI_SPILL_AV256_CFI_SAVE: case AMDGPU::SI_SPILL_AV256_RESTORE: return 8; case AMDGPU::SI_SPILL_S224_SAVE: + case AMDGPU::SI_SPILL_S224_CFI_SAVE: case AMDGPU::SI_SPILL_S224_RESTORE: case AMDGPU::SI_SPILL_V224_SAVE: + case AMDGPU::SI_SPILL_V224_CFI_SAVE: case AMDGPU::SI_SPILL_V224_RESTORE: case AMDGPU::SI_SPILL_A224_SAVE: + case AMDGPU::SI_SPILL_A224_CFI_SAVE: case AMDGPU::SI_SPILL_A224_RESTORE: case AMDGPU::SI_SPILL_AV224_SAVE: + case AMDGPU::SI_SPILL_AV224_CFI_SAVE: case AMDGPU::SI_SPILL_AV224_RESTORE: return 7; case AMDGPU::SI_SPILL_S192_SAVE: + case AMDGPU::SI_SPILL_S192_CFI_SAVE: case AMDGPU::SI_SPILL_S192_RESTORE: case AMDGPU::SI_SPILL_V192_SAVE: + case AMDGPU::SI_SPILL_V192_CFI_SAVE: case AMDGPU::SI_SPILL_V192_RESTORE: case AMDGPU::SI_SPILL_A192_SAVE: + case AMDGPU::SI_SPILL_A192_CFI_SAVE: case AMDGPU::SI_SPILL_A192_RESTORE: case AMDGPU::SI_SPILL_AV192_SAVE: + case AMDGPU::SI_SPILL_AV192_CFI_SAVE: case AMDGPU::SI_SPILL_AV192_RESTORE: return 6; case AMDGPU::SI_SPILL_S160_SAVE: + case AMDGPU::SI_SPILL_S160_CFI_SAVE: case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_V160_SAVE: + case AMDGPU::SI_SPILL_V160_CFI_SAVE: case AMDGPU::SI_SPILL_V160_RESTORE: case AMDGPU::SI_SPILL_A160_SAVE: + case AMDGPU::SI_SPILL_A160_CFI_SAVE: case AMDGPU::SI_SPILL_A160_RESTORE: case AMDGPU::SI_SPILL_AV160_SAVE: + case AMDGPU::SI_SPILL_AV160_CFI_SAVE: case AMDGPU::SI_SPILL_AV160_RESTORE: return 5; case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S128_CFI_SAVE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_V128_SAVE: + case AMDGPU::SI_SPILL_V128_CFI_SAVE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_A128_SAVE: + case AMDGPU::SI_SPILL_A128_CFI_SAVE: case AMDGPU::SI_SPILL_A128_RESTORE: case AMDGPU::SI_SPILL_AV128_SAVE: + case AMDGPU::SI_SPILL_AV128_CFI_SAVE: case AMDGPU::SI_SPILL_AV128_RESTORE: return 4; case AMDGPU::SI_SPILL_S96_SAVE: + case AMDGPU::SI_SPILL_S96_CFI_SAVE: case AMDGPU::SI_SPILL_S96_RESTORE: case AMDGPU::SI_SPILL_V96_SAVE: + case AMDGPU::SI_SPILL_V96_CFI_SAVE: case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_A96_SAVE: + case AMDGPU::SI_SPILL_A96_CFI_SAVE: case AMDGPU::SI_SPILL_A96_RESTORE: case AMDGPU::SI_SPILL_AV96_SAVE: + case AMDGPU::SI_SPILL_AV96_CFI_SAVE: case AMDGPU::SI_SPILL_AV96_RESTORE: return 3; case AMDGPU::SI_SPILL_S64_SAVE: + case AMDGPU::SI_SPILL_S64_CFI_SAVE: case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_V64_SAVE: + case AMDGPU::SI_SPILL_V64_CFI_SAVE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_A64_SAVE: + case AMDGPU::SI_SPILL_A64_CFI_SAVE: case AMDGPU::SI_SPILL_A64_RESTORE: case AMDGPU::SI_SPILL_AV64_SAVE: + case AMDGPU::SI_SPILL_AV64_CFI_SAVE: case AMDGPU::SI_SPILL_AV64_RESTORE: return 2; case AMDGPU::SI_SPILL_S32_SAVE: + case AMDGPU::SI_SPILL_S32_CFI_SAVE: case AMDGPU::SI_SPILL_S32_RESTORE: case AMDGPU::SI_SPILL_V32_SAVE: + case AMDGPU::SI_SPILL_V32_CFI_SAVE: case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_A32_SAVE: + case AMDGPU::SI_SPILL_A32_CFI_SAVE: case AMDGPU::SI_SPILL_A32_RESTORE: case AMDGPU::SI_SPILL_AV32_SAVE: + case AMDGPU::SI_SPILL_AV32_CFI_SAVE: case AMDGPU::SI_SPILL_AV32_RESTORE: case AMDGPU::SI_SPILL_WWM_V32_SAVE: case AMDGPU::SI_SPILL_WWM_V32_RESTORE: @@ -1386,14 +1427,14 @@ static int getOffenMUBUFLoad(unsigned Opc) { } } -static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - int Index, unsigned Lane, - unsigned ValueReg, bool IsKill) { +static MachineInstrBuilder +spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, int Index, unsigned Lane, + unsigned ValueReg, bool IsKill, bool NeedsCFI) { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); + const SIFrameLowering *TFL = ST.getFrameLowering(); MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane); @@ -1416,6 +1457,8 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst) .addReg(Src, getKillRegState(IsKill)); CopyMIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); + if (NeedsCFI) + TFL->buildCFIForVRegToVRegSpill(MBB, MI, DL, Src, Dst); return CopyMIB; } unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 @@ -1424,6 +1467,8 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst) .addReg(Src, getKillRegState(IsKill)); MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); + if (NeedsCFI) + TFL->buildCFIForVRegToVRegSpill(MBB, MI, DL, Src, Dst); return MIB; } @@ -1446,7 +1491,8 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, return false; const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); - if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr()) + if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false, false) + .getInstr()) return true; MachineInstrBuilder NewMI = @@ -1511,12 +1557,13 @@ void SIRegisterInfo::buildSpillLoadStore( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill, MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO, - RegScavenger *RS, LiveRegUnits *LiveUnits) const { + RegScavenger *RS, LiveRegUnits *LiveUnits, bool NeedsCFI) const { assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both"); MachineFunction *MF = MBB.getParent(); const SIInstrInfo *TII = ST.getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); + const SIFrameLowering *TFL = ST.getFrameLowering(); const SIMachineFunctionInfo *FuncInfo = MF->getInfo(); const MCInstrDesc *Desc = &TII->get(LoadStoreOp); @@ -1548,6 +1595,7 @@ void SIRegisterInfo::buildSpillLoadStore( int64_t MaxOffset = Offset + Size + RemSize - EltSize; int64_t ScratchOffsetRegDelta = 0; + int64_t AdditionalCFIOffset = 0; if (IsFlat && EltSize > 4) { LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize); @@ -1660,6 +1708,7 @@ void SIRegisterInfo::buildSpillLoadStore( Scavenged = true; } + AdditionalCFIOffset = Offset; // We currently only support spilling VGPRs to EltSize boundaries, meaning // we can simplify the adjustment of Offset here to just scale with // WavefrontSize. @@ -1762,7 +1811,8 @@ void SIRegisterInfo::buildSpillLoadStore( Register Sub = IsSubReg ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane))) : ValueReg; - auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill); + auto MIB = + spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill, NeedsCFI); if (!MIB.getInstr()) break; if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) { @@ -1863,6 +1913,18 @@ void SIRegisterInfo::buildSpillLoadStore( MIB.addImm(0); // swz MIB.addMemOperand(NewMMO); + if (IsStore && NeedsCFI) { + if (TII->isBlockLoadStore(LoadStoreOp)) { + assert(RegOffset == 0 && + "expected whole register block to be treated as single element"); + buildCFIForBlockCSRStore(MBB, MI, ValueReg, Offset); + } else { + TFL->buildCFIForVGPRToVMEMSpill( + MBB, MI, DebugLoc(), SubReg, + (Offset + RegOffset) * ST.getWavefrontSize() + AdditionalCFIOffset); + } + } + if (!IsAGPR && NeedSuperRegDef) MIB.addReg(ValueReg, RegState::ImplicitDefine); @@ -1934,6 +1996,31 @@ void SIRegisterInfo::addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, MIB.addUse(BaseVGPR + RegOffset, RegState::Implicit); } +void SIRegisterInfo::buildCFIForBlockCSRStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register BlockReg, + int64_t Offset) const { + const MachineFunction *MF = MBB.getParent(); + const SIMachineFunctionInfo *FuncInfo = MF->getInfo(); + uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(BlockReg); + Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0); + for (unsigned RegOffset = 0; RegOffset < 32; ++RegOffset) { + Register VGPR = BaseVGPR + RegOffset; + if (Mask & (1 << RegOffset)) { + assert(isCalleeSavedPhysReg(VGPR, *MF)); + ST.getFrameLowering()->buildCFIForVGPRToVMEMSpill( + MBB, MBBI, DebugLoc(), VGPR, + (Offset + RegOffset) * ST.getWavefrontSize()); + } else if (isCalleeSavedPhysReg(VGPR, *MF)) { + // FIXME: This is a workaround for the fact that FrameLowering's + // emitPrologueEntryCFI considers the block load to clobber all registers + // in the block. + ST.getFrameLowering()->buildCFIForSameValue(MBB, MBBI, DebugLoc(), + BaseVGPR + RegOffset); + } + } +} + void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill) const { @@ -1970,7 +2057,7 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS, SlotIndexes *Indexes, LiveIntervals *LIS, bool OnlyToVGPR, - bool SpillToPhysVGPRLane) const { + bool SpillToPhysVGPRLane, bool NeedsCFI) const { assert(!MI->getOperand(0).isUndef() && "undef spill should have been deleted earlier"); @@ -1983,6 +2070,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, if (OnlyToVGPR && !SpillToVGPR) return false; + const SIFrameLowering *TFL = ST.getFrameLowering(); + assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() && SB.SuperReg != SB.MFI.getFrameOffsetReg())); @@ -2015,11 +2104,27 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, .addReg(SubReg, getKillRegState(UseKill)) .addImm(Spill.Lane) .addReg(Spill.VGPR); + + MachineInstr *CFI = nullptr; + if (NeedsCFI) { + if (SB.SuperReg == SB.TRI.getReturnAddressReg(SB.MF)) { + if (i == e - 1) + CFI = TFL->buildCFIForSGPRToVGPRSpill(*SB.MBB, MI, DebugLoc(), + AMDGPU::PC_REG, VGPRSpills); + } else { + CFI = TFL->buildCFIForSGPRToVGPRSpill(*SB.MBB, MI, DebugLoc(), SubReg, + Spill.VGPR, Spill.Lane); + } + } + if (Indexes) { if (IsFirstSubreg) Indexes->replaceMachineInstrInMaps(*MI, *MIB); else Indexes->insertMachineInstrInMaps(*MIB); + + if (CFI) + Indexes->insertMachineInstrInMaps(*CFI); } if (IsFirstSubreg && SB.NumSubRegs > 1) { @@ -2084,6 +2189,18 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, // Write out VGPR SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false); + + // TODO: Implement CFI for SpillToVMEM for all scenarios. + MachineInstr *CFI = nullptr; + if (NeedsCFI && SB.SuperReg == SB.TRI.getReturnAddressReg(SB.MF)) { + int64_t CFIOffset = (Offset * SB.EltSize + + SB.MF.getFrameInfo().getObjectOffset(Index)) * + ST.getWavefrontSize(); + CFI = TFL->buildCFIForSGPRToVMEMSpill(*SB.MBB, MI, DebugLoc(), + AMDGPU::PC_REG, CFIOffset); + } + if (Indexes && CFI) + Indexes->insertMachineInstrInMaps(*CFI); } SB.restore(); @@ -2255,7 +2372,20 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const { + bool NeedsCFI = false; switch (MI->getOpcode()) { + case AMDGPU::SI_SPILL_S1024_CFI_SAVE: + case AMDGPU::SI_SPILL_S512_CFI_SAVE: + case AMDGPU::SI_SPILL_S256_CFI_SAVE: + case AMDGPU::SI_SPILL_S224_CFI_SAVE: + case AMDGPU::SI_SPILL_S192_CFI_SAVE: + case AMDGPU::SI_SPILL_S160_CFI_SAVE: + case AMDGPU::SI_SPILL_S128_CFI_SAVE: + case AMDGPU::SI_SPILL_S96_CFI_SAVE: + case AMDGPU::SI_SPILL_S64_CFI_SAVE: + case AMDGPU::SI_SPILL_S32_CFI_SAVE: + NeedsCFI = true; + [[fallthrough]]; case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S384_SAVE: @@ -2270,7 +2400,8 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: - return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane); + return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane, + NeedsCFI); case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S384_RESTORE: @@ -2313,8 +2444,23 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, ? getBaseRegister() : getFrameRegister(*MF); + bool NeedsCFI = false; + switch (MI->getOpcode()) { // SGPR register spill + case AMDGPU::SI_SPILL_S1024_CFI_SAVE: + case AMDGPU::SI_SPILL_S512_CFI_SAVE: + case AMDGPU::SI_SPILL_S256_CFI_SAVE: + case AMDGPU::SI_SPILL_S224_CFI_SAVE: + case AMDGPU::SI_SPILL_S192_CFI_SAVE: + case AMDGPU::SI_SPILL_S160_CFI_SAVE: + case AMDGPU::SI_SPILL_S128_CFI_SAVE: + case AMDGPU::SI_SPILL_S96_CFI_SAVE: + case AMDGPU::SI_SPILL_S64_CFI_SAVE: + case AMDGPU::SI_SPILL_S32_CFI_SAVE: { + NeedsCFI = true; + [[fallthrough]]; + } case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S384_SAVE: @@ -2329,7 +2475,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: { - return spillSGPR(MI, Index, RS); + return spillSGPR(MI, Index, RS, nullptr, nullptr, false, false, NeedsCFI); } // SGPR register restore @@ -2351,13 +2497,40 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } // VGPR register spill - case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: { - // Put mask into M0. - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), - AMDGPU::M0) - .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask)); + case AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE: + case AMDGPU::SI_SPILL_V1024_CFI_SAVE: + case AMDGPU::SI_SPILL_V512_CFI_SAVE: + case AMDGPU::SI_SPILL_V256_CFI_SAVE: + case AMDGPU::SI_SPILL_V224_CFI_SAVE: + case AMDGPU::SI_SPILL_V192_CFI_SAVE: + case AMDGPU::SI_SPILL_V160_CFI_SAVE: + case AMDGPU::SI_SPILL_V128_CFI_SAVE: + case AMDGPU::SI_SPILL_V96_CFI_SAVE: + case AMDGPU::SI_SPILL_V64_CFI_SAVE: + case AMDGPU::SI_SPILL_V32_CFI_SAVE: + case AMDGPU::SI_SPILL_A1024_CFI_SAVE: + case AMDGPU::SI_SPILL_A512_CFI_SAVE: + case AMDGPU::SI_SPILL_A256_CFI_SAVE: + case AMDGPU::SI_SPILL_A224_CFI_SAVE: + case AMDGPU::SI_SPILL_A192_CFI_SAVE: + case AMDGPU::SI_SPILL_A160_CFI_SAVE: + case AMDGPU::SI_SPILL_A128_CFI_SAVE: + case AMDGPU::SI_SPILL_A96_CFI_SAVE: + case AMDGPU::SI_SPILL_A64_CFI_SAVE: + case AMDGPU::SI_SPILL_A32_CFI_SAVE: + case AMDGPU::SI_SPILL_AV1024_CFI_SAVE: + case AMDGPU::SI_SPILL_AV512_CFI_SAVE: + case AMDGPU::SI_SPILL_AV256_CFI_SAVE: + case AMDGPU::SI_SPILL_AV224_CFI_SAVE: + case AMDGPU::SI_SPILL_AV192_CFI_SAVE: + case AMDGPU::SI_SPILL_AV160_CFI_SAVE: + case AMDGPU::SI_SPILL_AV128_CFI_SAVE: + case AMDGPU::SI_SPILL_AV96_CFI_SAVE: + case AMDGPU::SI_SPILL_AV64_CFI_SAVE: + case AMDGPU::SI_SPILL_AV32_CFI_SAVE: + NeedsCFI = true; [[fallthrough]]; - } + case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V384_SAVE: @@ -2403,6 +2576,16 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_AV32_SAVE: case AMDGPU::SI_SPILL_WWM_V32_SAVE: case AMDGPU::SI_SPILL_WWM_AV32_SAVE: { + assert( + MI->getOpcode() != AMDGPU::SI_BLOCK_SPILL_V1024_SAVE && + "block spill does not currenty support spilling non-CSR registers"); + + if (MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE) + // Put mask into M0. + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), + AMDGPU::M0) + .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask)); + const MachineOperand *VData = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); if (VData->isUndef()) { @@ -2418,7 +2601,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!"); Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16; } else { - Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_SAVE + Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE ? AMDGPU::SCRATCH_STORE_BLOCK_SADDR : ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; @@ -2428,12 +2611,12 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode()); if (IsWWMRegSpill) { TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(), - RS->isRegUsed(AMDGPU::SCC)); + RS->isRegUsed(AMDGPU::SCC)); } buildSpillLoadStore( *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), - *MI->memoperands_begin(), RS); + *MI->memoperands_begin(), RS, nullptr, NeedsCFI); MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(*MI, TII)); if (IsWWMRegSpill) TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy()); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 7b91ba7bc581f..2dae5f0eb1c69 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -121,6 +121,13 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const; + // Iterate over all VGPRs in the given BlockReg and emit CFI for each VGPR + // as-needed depending on the (statically known) mask, relative to the given + // base Offset. + void buildCFIForBlockCSRStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register BlockReg, int64_t Offset) const; + const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override; @@ -176,8 +183,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { /// free VGPR lane to spill. bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, - bool OnlyToVGPR = false, - bool SpillToPhysVGPRLane = false) const; + bool OnlyToVGPR = false, bool SpillToPhysVGPRLane = false, + bool NeedsCFI = false) const; bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, @@ -459,8 +466,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, - RegScavenger *RS, - LiveRegUnits *LiveUnits = nullptr) const; + RegScavenger *RS, LiveRegUnits *LiveUnits = nullptr, + bool NeedsCFI = false) const; // Return alignment in register file of first register in a register tuple. unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll index b84b31cd2702c..2a5c8be7a987a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll @@ -13,20 +13,20 @@ define ptr addrspace(1) @call_assert_align() { ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v40, s16, 2 -; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: s_addk_i32 s32, 0x400 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, ext@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, ext@rel32@hi+12 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_readlane_b32 s30, v40, 0 ; CHECK-NEXT: global_store_dword v[0:1], v2, off ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_readlane_b32 s31, v40, 1 -; CHECK-NEXT: v_readlane_b32 s30, v40, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll index c16c8e2128c72..3e3e788b2f31d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -222,24 +222,24 @@ define void @func_caller_stack() { ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: v_writelane_b32 v40, s4, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: v_mov_b32_e32 v0, 9 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; MUBUF-NEXT: v_mov_b32_e32 v0, 10 -; MUBUF-NEXT: v_writelane_b32 v40, s4, 2 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; MUBUF-NEXT: v_mov_b32_e32 v0, 11 -; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; MUBUF-NEXT: v_mov_b32_e32 v0, 12 ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 2 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -257,8 +257,10 @@ define void @func_caller_stack() { ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_add_u32 s0, s32, 4 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 9 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 @@ -270,15 +272,13 @@ define void @func_caller_stack() { ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; FLATSCR-NEXT: s_add_u32 s0, s32, 16 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 12 -; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -300,15 +300,15 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) { ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] -; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen -; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4 -; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_writelane_b32 v40, s4, 2 ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 +; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; MUBUF-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4 ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_byval@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_byval@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: s_waitcnt vmcnt(1) ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; MUBUF-NEXT: s_waitcnt vmcnt(1) @@ -363,8 +363,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) { ; MUBUF-NEXT: s_waitcnt vmcnt(1) ; MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 2 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -382,14 +382,14 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) { ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 +; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_byval@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_byval@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[1:2], s32 ; FLATSCR-NEXT: scratch_load_dwordx2 v[1:2], v0, off offset:8 @@ -414,8 +414,8 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) { ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s32 offset:56 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll index 8cb9a5486a2de..b17324a38ada0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll @@ -363,7 +363,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b32 s33, s6 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX9-NEXT: s_add_u32 s5, s32, 0x7ff @@ -377,6 +376,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX9-NEXT: s_add_u32 s32, s5, s4 ; GFX9-NEXT: s_mov_b32 s32, s34 ; GFX9-NEXT: s_mov_b32 s34, s7 +; GFX9-NEXT: s_mov_b32 s33, s6 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -394,7 +394,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX10-NEXT: s_add_u32 s5, s32, 0x3ff @@ -408,6 +407,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX10-NEXT: s_add_u32 s32, s5, s4 ; GFX10-NEXT: s_mov_b32 s32, s34 ; GFX10-NEXT: s_mov_b32 s34, s7 +; GFX10-NEXT: s_mov_b32 s33, s6 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align32: @@ -424,7 +424,6 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 ; GFX11-NEXT: s_add_u32 s1, s32, 0x3ff @@ -439,6 +438,7 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { ; GFX11-NEXT: s_add_u32 s32, s1, s0 ; GFX11-NEXT: s_mov_b32 s32, s34 ; GFX11-NEXT: s_mov_b32 s34, s3 +; GFX11-NEXT: s_mov_b32 s33, s2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %n = load i32, ptr addrspace(4) @gv %alloca = alloca i32, i32 %n, align 32, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll index c295a662704e9..e058a3e5c332e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -235,17 +235,17 @@ define void @sink_null_insert_pt(ptr addrspace(4) %arg0) { ; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: global_load_dword v0, v[0:1], off glc -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s16, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: global_load_dword v0, v[0:1], off glc +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], 0 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll index e86f7473363f7..c037a93af124b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll @@ -13,18 +13,24 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; GFX6-LABEL: name: system_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -33,6 +39,8 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; ; GFX10CU-LABEL: name: system_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -41,6 +49,8 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; ; GFX11WGP-LABEL: name: system_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -49,6 +59,8 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; ; GFX11CU-LABEL: name: system_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -62,34 +74,46 @@ entry: define amdgpu_kernel void @system_one_as_release() #0 { ; GFX6-LABEL: name: system_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -101,18 +125,24 @@ entry: define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; GFX6-LABEL: name: system_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -121,6 +151,8 @@ define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; ; GFX10CU-LABEL: name: system_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -129,6 +161,8 @@ define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: system_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -137,6 +171,8 @@ define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; ; GFX11CU-LABEL: name: system_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -150,18 +186,24 @@ entry: define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; GFX6-LABEL: name: system_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -170,6 +212,8 @@ define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; ; GFX10CU-LABEL: name: system_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -178,6 +222,8 @@ define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: system_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -186,6 +232,8 @@ define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; ; GFX11CU-LABEL: name: system_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -199,26 +247,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_acquire() #0 { ; GFX6-LABEL: name: singlethread_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") acquire @@ -228,26 +288,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_release() #0 { ; GFX6-LABEL: name: singlethread_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") release @@ -257,26 +329,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_acq_rel() #0 { ; GFX6-LABEL: name: singlethread_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") acq_rel @@ -286,26 +370,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_seq_cst() #0 { ; GFX6-LABEL: name: singlethread_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") seq_cst @@ -315,18 +411,24 @@ entry: define amdgpu_kernel void @agent_one_as_acquire() #0 { ; GFX6-LABEL: name: agent_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -335,6 +437,8 @@ define amdgpu_kernel void @agent_one_as_acquire() #0 { ; ; GFX10CU-LABEL: name: agent_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -343,6 +447,8 @@ define amdgpu_kernel void @agent_one_as_acquire() #0 { ; ; GFX11WGP-LABEL: name: agent_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -351,6 +457,8 @@ define amdgpu_kernel void @agent_one_as_acquire() #0 { ; ; GFX11CU-LABEL: name: agent_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -364,34 +472,46 @@ entry: define amdgpu_kernel void @agent_one_as_release() #0 { ; GFX6-LABEL: name: agent_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -403,18 +523,24 @@ entry: define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; GFX6-LABEL: name: agent_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -423,6 +549,8 @@ define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; ; GFX10CU-LABEL: name: agent_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -431,6 +559,8 @@ define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: agent_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -439,6 +569,8 @@ define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; ; GFX11CU-LABEL: name: agent_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -452,18 +584,24 @@ entry: define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; GFX6-LABEL: name: agent_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -472,6 +610,8 @@ define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; ; GFX10CU-LABEL: name: agent_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -480,6 +620,8 @@ define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: agent_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -488,6 +630,8 @@ define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; ; GFX11CU-LABEL: name: agent_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -501,14 +645,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_acquire() #0 { ; GFX6-LABEL: name: workgroup_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -516,10 +666,14 @@ define amdgpu_kernel void @workgroup_one_as_acquire() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -527,6 +681,8 @@ define amdgpu_kernel void @workgroup_one_as_acquire() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") acquire @@ -536,14 +692,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_release() #0 { ; GFX6-LABEL: name: workgroup_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -551,6 +713,8 @@ define amdgpu_kernel void @workgroup_one_as_release() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -558,12 +722,16 @@ define amdgpu_kernel void @workgroup_one_as_release() #0 { ; ; GFX11WGP-LABEL: name: workgroup_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -575,14 +743,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; GFX6-LABEL: name: workgroup_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -591,6 +765,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -598,6 +774,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: workgroup_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -605,6 +783,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -616,14 +796,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; GFX6-LABEL: name: workgroup_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -632,6 +818,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -639,6 +827,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: workgroup_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -646,6 +836,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -657,26 +849,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_acquire() #0 { ; GFX6-LABEL: name: wavefront_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") acquire @@ -686,26 +890,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_release() #0 { ; GFX6-LABEL: name: wavefront_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") release @@ -715,26 +931,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_acq_rel() #0 { ; GFX6-LABEL: name: wavefront_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") acq_rel @@ -744,26 +972,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_seq_cst() #0 { ; GFX6-LABEL: name: wavefront_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") seq_cst @@ -773,18 +1013,24 @@ entry: define amdgpu_kernel void @system_acquire() #0 { ; GFX6-LABEL: name: system_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -793,6 +1039,8 @@ define amdgpu_kernel void @system_acquire() #0 { ; ; GFX10CU-LABEL: name: system_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -801,6 +1049,8 @@ define amdgpu_kernel void @system_acquire() #0 { ; ; GFX11WGP-LABEL: name: system_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -809,6 +1059,8 @@ define amdgpu_kernel void @system_acquire() #0 { ; ; GFX11CU-LABEL: name: system_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -822,34 +1074,46 @@ entry: define amdgpu_kernel void @system_release() #0 { ; GFX6-LABEL: name: system_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -861,18 +1125,24 @@ entry: define amdgpu_kernel void @system_acq_rel() #0 { ; GFX6-LABEL: name: system_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -881,6 +1151,8 @@ define amdgpu_kernel void @system_acq_rel() #0 { ; ; GFX10CU-LABEL: name: system_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -889,6 +1161,8 @@ define amdgpu_kernel void @system_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: system_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -897,6 +1171,8 @@ define amdgpu_kernel void @system_acq_rel() #0 { ; ; GFX11CU-LABEL: name: system_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -910,18 +1186,24 @@ entry: define amdgpu_kernel void @system_seq_cst() #0 { ; GFX6-LABEL: name: system_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -930,6 +1212,8 @@ define amdgpu_kernel void @system_seq_cst() #0 { ; ; GFX10CU-LABEL: name: system_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -938,6 +1222,8 @@ define amdgpu_kernel void @system_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: system_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -946,6 +1232,8 @@ define amdgpu_kernel void @system_seq_cst() #0 { ; ; GFX11CU-LABEL: name: system_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -959,26 +1247,38 @@ entry: define amdgpu_kernel void @singlethread_acquire() #0 { ; GFX6-LABEL: name: singlethread_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") acquire @@ -988,26 +1288,38 @@ entry: define amdgpu_kernel void @singlethread_release() #0 { ; GFX6-LABEL: name: singlethread_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") release @@ -1017,26 +1329,38 @@ entry: define amdgpu_kernel void @singlethread_acq_rel() #0 { ; GFX6-LABEL: name: singlethread_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") acq_rel @@ -1046,26 +1370,38 @@ entry: define amdgpu_kernel void @singlethread_seq_cst() #0 { ; GFX6-LABEL: name: singlethread_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") seq_cst @@ -1075,18 +1411,24 @@ entry: define amdgpu_kernel void @agent_acquire() #0 { ; GFX6-LABEL: name: agent_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1095,6 +1437,8 @@ define amdgpu_kernel void @agent_acquire() #0 { ; ; GFX10CU-LABEL: name: agent_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1103,6 +1447,8 @@ define amdgpu_kernel void @agent_acquire() #0 { ; ; GFX11WGP-LABEL: name: agent_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1111,6 +1457,8 @@ define amdgpu_kernel void @agent_acquire() #0 { ; ; GFX11CU-LABEL: name: agent_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1124,34 +1472,46 @@ entry: define amdgpu_kernel void @agent_release() #0 { ; GFX6-LABEL: name: agent_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1163,18 +1523,24 @@ entry: define amdgpu_kernel void @agent_acq_rel() #0 { ; GFX6-LABEL: name: agent_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1183,6 +1549,8 @@ define amdgpu_kernel void @agent_acq_rel() #0 { ; ; GFX10CU-LABEL: name: agent_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1191,6 +1559,8 @@ define amdgpu_kernel void @agent_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: agent_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1199,6 +1569,8 @@ define amdgpu_kernel void @agent_acq_rel() #0 { ; ; GFX11CU-LABEL: name: agent_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1212,18 +1584,24 @@ entry: define amdgpu_kernel void @agent_seq_cst() #0 { ; GFX6-LABEL: name: agent_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1232,6 +1610,8 @@ define amdgpu_kernel void @agent_seq_cst() #0 { ; ; GFX10CU-LABEL: name: agent_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1240,6 +1620,8 @@ define amdgpu_kernel void @agent_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: agent_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1248,6 +1630,8 @@ define amdgpu_kernel void @agent_seq_cst() #0 { ; ; GFX11CU-LABEL: name: agent_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1261,16 +1645,22 @@ entry: define amdgpu_kernel void @workgroup_acquire() #0 { ; GFX6-LABEL: name: workgroup_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1278,11 +1668,15 @@ define amdgpu_kernel void @workgroup_acquire() #0 { ; ; GFX10CU-LABEL: name: workgroup_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 49279 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1290,6 +1684,8 @@ define amdgpu_kernel void @workgroup_acquire() #0 { ; ; GFX11CU-LABEL: name: workgroup_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 64519 ; GFX11CU-NEXT: S_ENDPGM 0 entry: @@ -1300,16 +1696,22 @@ entry: define amdgpu_kernel void @workgroup_release() #0 { ; GFX6-LABEL: name: workgroup_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1317,6 +1719,8 @@ define amdgpu_kernel void @workgroup_release() #0 { ; ; GFX10CU-LABEL: name: workgroup_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1324,12 +1728,16 @@ define amdgpu_kernel void @workgroup_release() #0 { ; ; GFX11WGP-LABEL: name: workgroup_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1341,16 +1749,22 @@ entry: define amdgpu_kernel void @workgroup_acq_rel() #0 { ; GFX6-LABEL: name: workgroup_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1359,6 +1773,8 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; ; GFX10CU-LABEL: name: workgroup_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1366,6 +1782,8 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: workgroup_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1373,6 +1791,8 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; ; GFX11CU-LABEL: name: workgroup_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1384,16 +1804,22 @@ entry: define amdgpu_kernel void @workgroup_seq_cst() #0 { ; GFX6-LABEL: name: workgroup_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1402,6 +1828,8 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; ; GFX10CU-LABEL: name: workgroup_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1409,6 +1837,8 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: workgroup_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1416,6 +1846,8 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; ; GFX11CU-LABEL: name: workgroup_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1427,26 +1859,38 @@ entry: define amdgpu_kernel void @wavefront_acquire() #0 { ; GFX6-LABEL: name: wavefront_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") acquire @@ -1456,26 +1900,38 @@ entry: define amdgpu_kernel void @wavefront_release() #0 { ; GFX6-LABEL: name: wavefront_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") release @@ -1485,26 +1941,38 @@ entry: define amdgpu_kernel void @wavefront_acq_rel() #0 { ; GFX6-LABEL: name: wavefront_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") acq_rel @@ -1514,26 +1982,38 @@ entry: define amdgpu_kernel void @wavefront_seq_cst() #0 { ; GFX6-LABEL: name: wavefront_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") seq_cst diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll index 21f459ac033ca..9839af011ecdb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll @@ -151,8 +151,8 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3 ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s7, s33 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_cbranch_execz .LBB2_3 @@ -217,9 +217,9 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i ; GCN-NEXT: s_mov_b32 s7, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0xfc0 ; GCN-NEXT: s_mov_b32 s8, s34 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_addk_i32 s32, 0x2000 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_cbranch_execz .LBB3_2 diff --git a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll index 003aa049b2d1b..324d853145924 100644 --- a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll @@ -337,7 +337,6 @@ define void @flat_atomic_xchg_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX90A-LABEL: flat_atomic_xchg_i32_ret_av_av_no_agprs: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -354,6 +353,7 @@ define void @flat_atomic_xchg_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX90A-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:31] @@ -464,7 +464,6 @@ define void @flat_atomic_xchg_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX950-LABEL: flat_atomic_xchg_i32_ret_av_av_no_agprs: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -481,6 +480,7 @@ define void @flat_atomic_xchg_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX950-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; def v[0:31] @@ -4006,7 +4006,6 @@ define void @flat_atomic_xor_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX90A-LABEL: flat_atomic_xor_i32_ret_av_av_no_agprs: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -4023,6 +4022,7 @@ define void @flat_atomic_xor_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX90A-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:31] @@ -4131,7 +4131,6 @@ define void @flat_atomic_xor_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX950-LABEL: flat_atomic_xor_i32_ret_av_av_no_agprs: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -4148,6 +4147,7 @@ define void @flat_atomic_xor_i32_ret_av_av_no_agprs(ptr %ptr) #0 { ; GFX950-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; def v[0:31] diff --git a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll index 34a4899123749..e67d5b0fad14a 100644 --- a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll @@ -337,7 +337,6 @@ define void @global_atomic_xchg_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX90A-LABEL: global_atomic_xchg_i32_ret_av_av_no_agprs: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -354,6 +353,7 @@ define void @global_atomic_xchg_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX90A-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:31] @@ -464,7 +464,6 @@ define void @global_atomic_xchg_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX950-LABEL: global_atomic_xchg_i32_ret_av_av_no_agprs: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -481,6 +480,7 @@ define void @global_atomic_xchg_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX950-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; def v[0:31] @@ -2985,7 +2985,6 @@ define void @global_atomic_xor_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX90A-LABEL: global_atomic_xor_i32_ret_av_av_no_agprs: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -3002,6 +3001,7 @@ define void @global_atomic_xor_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX90A-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def v[0:31] @@ -3110,7 +3110,6 @@ define void @global_atomic_xor_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX950-LABEL: global_atomic_xor_i32_ret_av_av_no_agprs: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a3, v40 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v41 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v42 ; Reload Reuse @@ -3127,6 +3126,7 @@ define void @global_atomic_xor_i32_ret_av_av_no_agprs(ptr addrspace(1) %ptr) #0 ; GFX950-NEXT: v_accvgpr_write_b32 a16, v61 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a17, v62 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a18, v63 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; def v[0:31] diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll index 3194581fa4213..2d7cfcea04124 100644 --- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -23,15 +23,15 @@ define void @parent_func_missing_inputs() #0 { ; FIXEDABI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; FIXEDABI-NEXT: s_mov_b64 exec, s[18:19] ; FIXEDABI-NEXT: v_writelane_b32 v40, s16, 2 -; FIXEDABI-NEXT: s_addk_i32 s32, 0x400 ; FIXEDABI-NEXT: v_writelane_b32 v40, s30, 0 +; FIXEDABI-NEXT: s_addk_i32 s32, 0x400 +; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1 ; FIXEDABI-NEXT: s_getpc_b64 s[16:17] ; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4 ; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12 -; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1 ; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17] -; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1 ; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0 +; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1 ; FIXEDABI-NEXT: s_mov_b32 s32, s33 ; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 2 ; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir index c1617574becc3..23cae4b6a6baa 100644 --- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir @@ -26,6 +26,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -49,230 +369,598 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -520,6 +1208,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -544,230 +1552,598 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -1044,6 +2420,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -1069,230 +2765,598 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -1541,6 +3605,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -1567,230 +3951,598 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -2067,6 +4819,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -2094,230 +5166,598 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -2567,6 +6007,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -2595,230 +6355,598 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -3095,6 +7223,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -3118,230 +7566,598 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr0, $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -3589,6 +8405,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -3613,230 +8749,598 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr0, $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -4112,6 +9616,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -4137,230 +9961,598 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -4609,6 +10801,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -4635,230 +11147,598 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -5133,6 +12013,326 @@ body: | ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -5160,230 +12360,598 @@ body: | ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec @@ -5633,6 +13201,326 @@ body: | ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-FLATSCR-NEXT: {{ $}} + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX908-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -5661,230 +13549,598 @@ body: | ; GFX90A-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX90A-FLATSCR-NEXT: liveins: $agpr32, $agpr33, $agpr34, $agpr35, $agpr36, $agpr37, $agpr38, $agpr39, $agpr40, $agpr41, $agpr42, $agpr43, $agpr44, $agpr45, $agpr46, $agpr47, $agpr48, $agpr49, $agpr50, $agpr51, $agpr52, $agpr53, $agpr54, $agpr55, $agpr56, $agpr57, $agpr58, $agpr59, $agpr60, $agpr61, $agpr62, $agpr63, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-FLATSCR-NEXT: {{ $}} + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr32, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr32, $vgpr0, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr33, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr33, $vgpr1, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr34, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr34, $vgpr2, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr35, $vgpr3, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr36, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr36, $vgpr4, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr37, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr37, $vgpr5, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr38, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr38, $vgpr6, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr39, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr39, $vgpr7, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr40, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr40, $vgpr8, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr41, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr41, $vgpr9, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr42, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr42, $vgpr10, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr43, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr43, $vgpr11, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr44, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr44, $vgpr12, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr45, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr45, $vgpr13, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr46, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr46, $vgpr14, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr47, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr47, $vgpr15, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr16 = V_ACCVGPR_READ_B32_e64 killed $agpr48, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr48, $vgpr16, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr17 = V_ACCVGPR_READ_B32_e64 killed $agpr49, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr49, $vgpr17, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr18 = V_ACCVGPR_READ_B32_e64 killed $agpr50, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr50, $vgpr18, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr19 = V_ACCVGPR_READ_B32_e64 killed $agpr51, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr51, $vgpr19, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr20 = V_ACCVGPR_READ_B32_e64 killed $agpr52, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr52, $vgpr20, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr21 = V_ACCVGPR_READ_B32_e64 killed $agpr53, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr53, $vgpr21, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr22 = V_ACCVGPR_READ_B32_e64 killed $agpr54, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr54, $vgpr22, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr23 = V_ACCVGPR_READ_B32_e64 killed $agpr55, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr55, $vgpr23, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr24 = V_ACCVGPR_READ_B32_e64 killed $agpr56, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr56, $vgpr24, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr25 = V_ACCVGPR_READ_B32_e64 killed $agpr57, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr57, $vgpr25, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr26 = V_ACCVGPR_READ_B32_e64 killed $agpr58, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr58, $vgpr26, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr27 = V_ACCVGPR_READ_B32_e64 killed $agpr59, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr59, $vgpr27, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr60, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr60, $vgpr28, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr61, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr61, $vgpr29, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr62, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr62, $vgpr30, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr63, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr63, $vgpr31, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr64, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr64, $vgpr32, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 killed $agpr65, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr65, $vgpr33, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr34 = V_ACCVGPR_READ_B32_e64 killed $agpr66, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr66, $vgpr34, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr35 = V_ACCVGPR_READ_B32_e64 killed $agpr67, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr67, $vgpr35, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr36 = V_ACCVGPR_READ_B32_e64 killed $agpr68, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr68, $vgpr36, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr37 = V_ACCVGPR_READ_B32_e64 killed $agpr69, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr69, $vgpr37, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr38 = V_ACCVGPR_READ_B32_e64 killed $agpr70, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr70, $vgpr38, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr39 = V_ACCVGPR_READ_B32_e64 killed $agpr71, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr71, $vgpr39, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr48 = V_ACCVGPR_READ_B32_e64 killed $agpr72, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr72, $vgpr48, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr49 = V_ACCVGPR_READ_B32_e64 killed $agpr73, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr73, $vgpr49, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr50 = V_ACCVGPR_READ_B32_e64 killed $agpr74, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr74, $vgpr50, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr51 = V_ACCVGPR_READ_B32_e64 killed $agpr75, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr75, $vgpr51, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr52 = V_ACCVGPR_READ_B32_e64 killed $agpr76, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr76, $vgpr52, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr77, $vgpr53, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr78, $vgpr54, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $agpr79, $vgpr55, 32, $exec, 64 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr80, 32, $exec, 64, 44800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr81, 32, $exec, 64, 44544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr82, 32, $exec, 64, 44288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr83, 32, $exec, 64, 44032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr84, 32, $exec, 64, 43776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr85, 32, $exec, 64, 43520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr86, 32, $exec, 64, 43264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr87, 32, $exec, 64, 43008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr88, 32, $exec, 64, 42752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr89, 32, $exec, 64, 42496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr90, 32, $exec, 64, 42240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr91, 32, $exec, 64, 41984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr92, 32, $exec, 64, 41728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr93, 32, $exec, 64, 41472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr94, 32, $exec, 64, 41216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr95, 32, $exec, 64, 40960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr96, 32, $exec, 64, 40704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr97, 32, $exec, 64, 40448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr98, 32, $exec, 64, 40192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr99, 32, $exec, 64, 39936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr100, 32, $exec, 64, 39680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr101, 32, $exec, 64, 39424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr102, 32, $exec, 64, 39168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr103, 32, $exec, 64, 38912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr104, 32, $exec, 64, 38656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr105, 32, $exec, 64, 38400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr106, 32, $exec, 64, 38144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr107, 32, $exec, 64, 37888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr108, 32, $exec, 64, 37632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr109, 32, $exec, 64, 37376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr110, 32, $exec, 64, 37120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr111, 32, $exec, 64, 36864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr112, 32, $exec, 64, 36608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr113, 32, $exec, 64, 36352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr114, 32, $exec, 64, 36096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr115, 32, $exec, 64, 35840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr116, 32, $exec, 64, 35584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr117, 32, $exec, 64, 35328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr118, 32, $exec, 64, 35072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr119, 32, $exec, 64, 34816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr120, 32, $exec, 64, 34560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr121, 32, $exec, 64, 34304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr122, 32, $exec, 64, 34048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr123, 32, $exec, 64, 33792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr124, 32, $exec, 64, 33536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr125, 32, $exec, 64, 33280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr126, 32, $exec, 64, 33024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr127, 32, $exec, 64, 32768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr128, 32, $exec, 64, 32512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr129, 32, $exec, 64, 32256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr130, 32, $exec, 64, 32000 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr131, 32, $exec, 64, 31744 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr132, 32, $exec, 64, 31488 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr133, 32, $exec, 64, 31232 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr134, 32, $exec, 64, 30976 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr135, 32, $exec, 64, 30720 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr136, 32, $exec, 64, 30464 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr137, 32, $exec, 64, 30208 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr138, 32, $exec, 64, 29952 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr139, 32, $exec, 64, 29696 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr140, 32, $exec, 64, 29440 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr141, 32, $exec, 64, 29184 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr142, 32, $exec, 64, 28928 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr143, 32, $exec, 64, 28672 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr144, 32, $exec, 64, 28416 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr145, 32, $exec, 64, 28160 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr146, 32, $exec, 64, 27904 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr147, 32, $exec, 64, 27648 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr148, 32, $exec, 64, 27392 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr149, 32, $exec, 64, 27136 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr150, 32, $exec, 64, 26880 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr151, 32, $exec, 64, 26624 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr152, 32, $exec, 64, 26368 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr153, 32, $exec, 64, 26112 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr154, 32, $exec, 64, 25856 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr155, 32, $exec, 64, 25600 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr156, 32, $exec, 64, 25344 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr157, 32, $exec, 64, 25088 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr158, 32, $exec, 64, 24832 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr159, 32, $exec, 64, 24576 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr160, 32, $exec, 64, 24320 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr161, 32, $exec, 64, 24064 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr162, 32, $exec, 64, 23808 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr163, 32, $exec, 64, 23552 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr164, 32, $exec, 64, 23296 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr165, 32, $exec, 64, 23040 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr166, 32, $exec, 64, 22784 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr167, 32, $exec, 64, 22528 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr168, 32, $exec, 64, 22272 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr169, 32, $exec, 64, 22016 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr170, 32, $exec, 64, 21760 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr171, 32, $exec, 64, 21504 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr172, 32, $exec, 64, 21248 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr173, 32, $exec, 64, 20992 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr174, 32, $exec, 64, 20736 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr175, 32, $exec, 64, 20480 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr176, 32, $exec, 64, 20224 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr177, 32, $exec, 64, 19968 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr178, 32, $exec, 64, 19712 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr179, 32, $exec, 64, 19456 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr180, 32, $exec, 64, 19200 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr181, 32, $exec, 64, 18944 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr182, 32, $exec, 64, 18688 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr183, 32, $exec, 64, 18432 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr184, 32, $exec, 64, 18176 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr185, 32, $exec, 64, 17920 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr186, 32, $exec, 64, 17664 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr187, 32, $exec, 64, 17408 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr188, 32, $exec, 64, 17152 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr189, 32, $exec, 64, 16896 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr190, 32, $exec, 64, 16640 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr191, 32, $exec, 64, 16384 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr192, 32, $exec, 64, 16128 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr193, 32, $exec, 64, 15872 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr194, 32, $exec, 64, 15616 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr195, 32, $exec, 64, 15360 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr196, 32, $exec, 64, 15104 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr197, 32, $exec, 64, 14848 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr198, 32, $exec, 64, 14592 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr199, 32, $exec, 64, 14336 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr200, 32, $exec, 64, 14080 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr201, 32, $exec, 64, 13824 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr202, 32, $exec, 64, 13568 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr203, 32, $exec, 64, 13312 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr204, 32, $exec, 64, 13056 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr205, 32, $exec, 64, 12800 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr206, 32, $exec, 64, 12544 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr207, 32, $exec, 64, 12288 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr208, 32, $exec, 64, 12032 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr209, 32, $exec, 64, 11776 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr210, 32, $exec, 64, 11520 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr211, 32, $exec, 64, 11264 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr212, 32, $exec, 64, 11008 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr213, 32, $exec, 64, 10752 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr214, 32, $exec, 64, 10496 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr215, 32, $exec, 64, 10240 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr216, 32, $exec, 64, 9984 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr217, 32, $exec, 64, 9728 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr218, 32, $exec, 64, 9472 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr219, 32, $exec, 64, 9216 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr220, 32, $exec, 64, 8960 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr221, 32, $exec, 64, 8704 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr222, 32, $exec, 64, 8448 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr223, 32, $exec, 64, 8192 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr224, 32, $exec, 64, 7936 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr225, 32, $exec, 64, 7680 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr226, 32, $exec, 64, 7424 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr227, 32, $exec, 64, 7168 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr228, 32, $exec, 64, 6912 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr229, 32, $exec, 64, 6656 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr230, 32, $exec, 64, 6400 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr231, 32, $exec, 64, 6144 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr232, 32, $exec, 64, 5888 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr233, 32, $exec, 64, 5632 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr234, 32, $exec, 64, 5376 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr235, 32, $exec, 64, 5120 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr236, 32, $exec, 64, 4864 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr237, 32, $exec, 64, 4608 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr238, 32, $exec, 64, 4352 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr239, 32, $exec, 64, 4096 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr240, 32, $exec, 64, 3840 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr241, 32, $exec, 64, 3584 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr242, 32, $exec, 64, 3328 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr243, 32, $exec, 64, 3072 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr244, 32, $exec, 64, 2816 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr245, 32, $exec, 64, 2560 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr246, 32, $exec, 64, 2304 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr247, 32, $exec, 64, 2048 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr248, 32, $exec, 64, 1792 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr249, 32, $exec, 64, 1536 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr250, 32, $exec, 64, 1280 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr251, 32, $exec, 64, 1024 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr252, 32, $exec, 64, 768 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr253, 32, $exec, 64, 512 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr254, 32, $exec, 64, 256 ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5) + ; GFX90A-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $agpr255, 32, $exec, 64, 0 ; GFX90A-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX90A-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5) ; GFX90A-FLATSCR-NEXT: $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll index ebbeab94066d6..a21db73cf3714 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -451,6 +451,7 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 { ; GFX90A-LABEL: v32_asm_def_use: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_accvgpr_read_b32 v35, a32 ; Reload Reuse ; GFX90A-NEXT: v_mov_b32_e32 v34, v0 ; GFX90A-NEXT: v_mov_b32_e32 v33, v1 ; GFX90A-NEXT: ;;#ASMSTART @@ -478,8 +479,8 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; copy ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v35, a32 ; Reload Reuse ; GFX90A-NEXT: v_accvgpr_mov_b32 a32, a1 +; GFX90A-NEXT: s_nop 0 ; GFX90A-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v34, v33, a[16:31] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; copy @@ -1056,6 +1057,7 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX90A-LABEL: no_free_vgprs_at_sgpr_to_agpr_copy: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_accvgpr_read_b32 v34, a32 ; Reload Reuse ; GFX90A-NEXT: v_mov_b32_e32 v33, v0 ; GFX90A-NEXT: v_mov_b32_e32 v32, v1 ; GFX90A-NEXT: ;;#ASMSTART @@ -1077,8 +1079,7 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX90A-NEXT: v_accvgpr_write_b32 a18, s2 ; GFX90A-NEXT: v_accvgpr_write_b32 a17, s1 ; GFX90A-NEXT: v_accvgpr_write_b32 a16, s0 -; GFX90A-NEXT: v_accvgpr_read_b32 v34, a32 ; Reload Reuse -; GFX90A-NEXT: s_nop 0 +; GFX90A-NEXT: s_nop 1 ; GFX90A-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v33, v32, a[16:31] ; GFX90A-NEXT: s_nop 10 ; GFX90A-NEXT: buffer_store_dword a0, off, s[0:3], s32 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir index 1573903945a3e..7f26e413cf780 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-reuse-writes.mir @@ -11,6 +11,16 @@ body: | ; GFX908-LABEL: name: standard ; GFX908: liveins: $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa , 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec @@ -42,6 +52,14 @@ body: | ; GFX908-LABEL: name: src_is_spill ; GFX908: liveins: $vgpr0, $vgpr1 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GFX908-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -66,6 +84,12 @@ body: | ; GFX908-LABEL: name: overlapping_agpr ; GFX908: liveins: $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa , 0, 6 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr1_agpr2_agpr3_agpr4 ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 diff --git a/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir b/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir index 47d489b7f35ca..6e5f8aceaf169 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir @@ -18,6 +18,54 @@ body: | ; GFX942-LABEL: name: agpr_spill_copy ; GFX942: liveins: $agpr30, $agpr31 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 ; GFX942-NEXT: renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27 = IMPLICIT_DEF ; GFX942-NEXT: renamable $agpr28_agpr29 = IMPLICIT_DEF ; GFX942-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll index 356bf4b3cac28..5943fdc10c14d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll @@ -19,24 +19,25 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr) ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_mov_b32 exec_lo, s1 ; DAGISEL-NEXT: v_writelane_b32 v42, s0, 2 +; DAGISEL-NEXT: s_add_co_i32 s32, s32, 16 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v41, s33 +; DAGISEL-NEXT: v_writelane_b32 v42, s30, 0 +; DAGISEL-NEXT: v_writelane_b32 v42, s31, 1 ; DAGISEL-NEXT: v_dual_mov_b32 v41, v2 :: v_dual_mov_b32 v40, v1 ; DAGISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 -; DAGISEL-NEXT: v_writelane_b32 v42, s30, 0 ; DAGISEL-NEXT: s_mov_b32 s1, good_callee@abs32@hi ; DAGISEL-NEXT: s_mov_b32 s0, good_callee@abs32@lo -; DAGISEL-NEXT: s_add_co_i32 s32, s32, 16 -; DAGISEL-NEXT: v_writelane_b32 v42, s31, 1 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; DAGISEL-NEXT: global_store_b32 v[40:41], v0, off ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_load_b32 v41, off, s33 ; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; DAGISEL-NEXT: v_readlane_b32 s31, v42, 1 ; DAGISEL-NEXT: v_readlane_b32 s30, v42, 0 +; DAGISEL-NEXT: v_readlane_b32 s31, v42, 1 ; DAGISEL-NEXT: s_mov_b32 s32, s33 ; DAGISEL-NEXT: v_readlane_b32 s0, v42, 2 ; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1 @@ -62,24 +63,25 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr) ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_mov_b32 exec_lo, s1 ; GISEL-NEXT: v_writelane_b32 v42, s0, 2 +; GISEL-NEXT: s_add_co_i32 s32, s32, 16 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v41, s33 +; GISEL-NEXT: v_writelane_b32 v42, s30, 0 +; GISEL-NEXT: v_writelane_b32 v42, s31, 1 ; GISEL-NEXT: v_dual_mov_b32 v40, v1 :: v_dual_mov_b32 v41, v2 ; GISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 -; GISEL-NEXT: v_writelane_b32 v42, s30, 0 ; GISEL-NEXT: s_mov_b32 s0, good_callee@abs32@lo ; GISEL-NEXT: s_mov_b32 s1, good_callee@abs32@hi -; GISEL-NEXT: s_add_co_i32 s32, s32, 16 -; GISEL-NEXT: v_writelane_b32 v42, s31, 1 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GISEL-NEXT: global_store_b32 v[40:41], v0, off ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_load_b32 v41, off, s33 ; GISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GISEL-NEXT: v_readlane_b32 s31, v42, 1 ; GISEL-NEXT: v_readlane_b32 s30, v42, 0 +; GISEL-NEXT: v_readlane_b32 s31, v42, 1 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s0, v42, 2 ; GISEL-NEXT: s_or_saveexec_b32 s1, -1 @@ -138,152 +140,291 @@ define amdgpu_gfx_whole_wave i32 @tail_call_from_whole_wave(i1 %active, i32 %x, ; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; DAGISEL-NEXT: s_clause 0xf ; DAGISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 @@ -453,152 +594,291 @@ define amdgpu_gfx_whole_wave i32 @tail_call_from_whole_wave(i1 %active, i32 %x, ; GISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; GISEL-NEXT: s_clause 0xf ; GISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 @@ -779,16 +1059,15 @@ define amdgpu_gfx void @ret_void(i32 %x) { ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_mov_b32 exec_lo, s1 ; DAGISEL-NEXT: v_writelane_b32 v40, s0, 2 -; DAGISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi -; DAGISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo -; DAGISEL-NEXT: s_add_co_i32 s32, s32, 16 ; DAGISEL-NEXT: v_writelane_b32 v40, s30, 0 +; DAGISEL-NEXT: s_add_co_i32 s32, s32, 16 ; DAGISEL-NEXT: v_writelane_b32 v40, s31, 1 +; DAGISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi +; DAGISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; DAGISEL-NEXT: v_readlane_b32 s31, v40, 1 ; DAGISEL-NEXT: v_readlane_b32 s30, v40, 0 +; DAGISEL-NEXT: v_readlane_b32 s31, v40, 1 ; DAGISEL-NEXT: s_mov_b32 s32, s33 ; DAGISEL-NEXT: v_readlane_b32 s0, v40, 2 ; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1 @@ -814,16 +1093,15 @@ define amdgpu_gfx void @ret_void(i32 %x) { ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_mov_b32 exec_lo, s1 ; GISEL-NEXT: v_writelane_b32 v40, s0, 2 -; GISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo -; GISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi -; GISEL-NEXT: s_add_co_i32 s32, s32, 16 ; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: s_add_co_i32 s32, s32, 16 ; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo +; GISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 ; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s0, v40, 2 ; GISEL-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll index 74552a500ac51..7aa648f674f35 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll @@ -6757,24 +6757,43 @@ define <128 x i8> @bitcast_v32i32_to_v128i8(<32 x i32> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -7466,42 +7485,43 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 -; SI-NEXT: v_writelane_b32 v20, s69, 21 -; SI-NEXT: v_writelane_b32 v20, s70, 22 -; SI-NEXT: v_writelane_b32 v20, s71, 23 -; SI-NEXT: v_writelane_b32 v20, s80, 24 -; SI-NEXT: v_writelane_b32 v20, s81, 25 -; SI-NEXT: v_writelane_b32 v20, s82, 26 -; SI-NEXT: v_writelane_b32 v20, s83, 27 -; SI-NEXT: v_writelane_b32 v20, s84, 28 -; SI-NEXT: v_writelane_b32 v20, s85, 29 -; SI-NEXT: v_writelane_b32 v20, s86, 30 -; SI-NEXT: v_writelane_b32 v20, s87, 31 -; SI-NEXT: v_writelane_b32 v20, s96, 32 -; SI-NEXT: v_writelane_b32 v20, s97, 33 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s70, 20 +; SI-NEXT: v_writelane_b32 v20, s71, 21 +; SI-NEXT: v_writelane_b32 v20, s80, 22 +; SI-NEXT: v_writelane_b32 v20, s81, 23 +; SI-NEXT: v_writelane_b32 v20, s82, 24 +; SI-NEXT: v_writelane_b32 v20, s83, 25 +; SI-NEXT: v_writelane_b32 v20, s84, 26 +; SI-NEXT: v_writelane_b32 v20, s85, 27 +; SI-NEXT: v_writelane_b32 v20, s86, 28 +; SI-NEXT: v_writelane_b32 v20, s87, 29 +; SI-NEXT: v_writelane_b32 v20, s96, 30 +; SI-NEXT: v_writelane_b32 v20, s97, 31 +; SI-NEXT: v_writelane_b32 v20, s98, 32 +; SI-NEXT: v_writelane_b32 v20, s99, 33 +; SI-NEXT: v_writelane_b32 v20, s30, 34 +; SI-NEXT: v_writelane_b32 v20, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s98, 34 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -7521,7 +7541,6 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: v_writelane_b32 v20, s99, 35 ; SI-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane ; SI-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB13_4 @@ -8391,6 +8410,7 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v20, 34 ; SI-NEXT: v_readlane_b32 s19, v22, 11 ; SI-NEXT: v_readlane_b32 s17, v22, 17 ; SI-NEXT: v_readlane_b32 s15, v22, 23 @@ -8398,42 +8418,41 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: v_readlane_b32 s11, v22, 35 ; SI-NEXT: v_readlane_b32 s9, v22, 41 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v20, 35 -; SI-NEXT: v_readlane_b32 s98, v20, 34 -; SI-NEXT: v_readlane_b32 s97, v20, 33 -; SI-NEXT: v_readlane_b32 s96, v20, 32 -; SI-NEXT: v_readlane_b32 s87, v20, 31 -; SI-NEXT: v_readlane_b32 s86, v20, 30 -; SI-NEXT: v_readlane_b32 s85, v20, 29 -; SI-NEXT: v_readlane_b32 s84, v20, 28 -; SI-NEXT: v_readlane_b32 s83, v20, 27 -; SI-NEXT: v_readlane_b32 s82, v20, 26 -; SI-NEXT: v_readlane_b32 s81, v20, 25 -; SI-NEXT: v_readlane_b32 s80, v20, 24 -; SI-NEXT: v_readlane_b32 s71, v20, 23 -; SI-NEXT: v_readlane_b32 s70, v20, 22 -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 35 +; SI-NEXT: v_readlane_b32 s99, v20, 33 +; SI-NEXT: v_readlane_b32 s98, v20, 32 +; SI-NEXT: v_readlane_b32 s97, v20, 31 +; SI-NEXT: v_readlane_b32 s96, v20, 30 +; SI-NEXT: v_readlane_b32 s87, v20, 29 +; SI-NEXT: v_readlane_b32 s86, v20, 28 +; SI-NEXT: v_readlane_b32 s85, v20, 27 +; SI-NEXT: v_readlane_b32 s84, v20, 26 +; SI-NEXT: v_readlane_b32 s83, v20, 25 +; SI-NEXT: v_readlane_b32 s82, v20, 24 +; SI-NEXT: v_readlane_b32 s81, v20, 23 +; SI-NEXT: v_readlane_b32 s80, v20, 22 +; SI-NEXT: v_readlane_b32 s71, v20, 21 +; SI-NEXT: v_readlane_b32 s70, v20, 20 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -8630,38 +8649,39 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; VI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v20, s30, 0 -; VI-NEXT: v_writelane_b32 v20, s31, 1 -; VI-NEXT: v_writelane_b32 v20, s34, 2 -; VI-NEXT: v_writelane_b32 v20, s35, 3 -; VI-NEXT: v_writelane_b32 v20, s36, 4 -; VI-NEXT: v_writelane_b32 v20, s37, 5 -; VI-NEXT: v_writelane_b32 v20, s38, 6 -; VI-NEXT: v_writelane_b32 v20, s39, 7 -; VI-NEXT: v_writelane_b32 v20, s48, 8 -; VI-NEXT: v_writelane_b32 v20, s49, 9 -; VI-NEXT: v_writelane_b32 v20, s50, 10 -; VI-NEXT: v_writelane_b32 v20, s51, 11 -; VI-NEXT: v_writelane_b32 v20, s52, 12 -; VI-NEXT: v_writelane_b32 v20, s53, 13 -; VI-NEXT: v_writelane_b32 v20, s54, 14 -; VI-NEXT: v_writelane_b32 v20, s55, 15 -; VI-NEXT: v_writelane_b32 v20, s64, 16 -; VI-NEXT: v_writelane_b32 v20, s65, 17 -; VI-NEXT: v_writelane_b32 v20, s66, 18 -; VI-NEXT: v_writelane_b32 v20, s67, 19 -; VI-NEXT: v_writelane_b32 v20, s68, 20 -; VI-NEXT: v_writelane_b32 v20, s69, 21 -; VI-NEXT: v_writelane_b32 v20, s70, 22 -; VI-NEXT: v_writelane_b32 v20, s71, 23 -; VI-NEXT: v_writelane_b32 v20, s80, 24 -; VI-NEXT: v_writelane_b32 v20, s81, 25 -; VI-NEXT: v_writelane_b32 v20, s82, 26 -; VI-NEXT: v_writelane_b32 v20, s83, 27 -; VI-NEXT: v_writelane_b32 v20, s84, 28 -; VI-NEXT: v_writelane_b32 v20, s85, 29 +; VI-NEXT: v_writelane_b32 v20, s34, 0 +; VI-NEXT: v_writelane_b32 v20, s35, 1 +; VI-NEXT: v_writelane_b32 v20, s36, 2 +; VI-NEXT: v_writelane_b32 v20, s37, 3 +; VI-NEXT: v_writelane_b32 v20, s38, 4 +; VI-NEXT: v_writelane_b32 v20, s39, 5 +; VI-NEXT: v_writelane_b32 v20, s48, 6 +; VI-NEXT: v_writelane_b32 v20, s49, 7 +; VI-NEXT: v_writelane_b32 v20, s50, 8 +; VI-NEXT: v_writelane_b32 v20, s51, 9 +; VI-NEXT: v_writelane_b32 v20, s52, 10 +; VI-NEXT: v_writelane_b32 v20, s53, 11 +; VI-NEXT: v_writelane_b32 v20, s54, 12 +; VI-NEXT: v_writelane_b32 v20, s55, 13 +; VI-NEXT: v_writelane_b32 v20, s64, 14 +; VI-NEXT: v_writelane_b32 v20, s65, 15 +; VI-NEXT: v_writelane_b32 v20, s66, 16 +; VI-NEXT: v_writelane_b32 v20, s67, 17 +; VI-NEXT: v_writelane_b32 v20, s68, 18 +; VI-NEXT: v_writelane_b32 v20, s69, 19 +; VI-NEXT: v_writelane_b32 v20, s70, 20 +; VI-NEXT: v_writelane_b32 v20, s71, 21 +; VI-NEXT: v_writelane_b32 v20, s80, 22 +; VI-NEXT: v_writelane_b32 v20, s81, 23 +; VI-NEXT: v_writelane_b32 v20, s82, 24 +; VI-NEXT: v_writelane_b32 v20, s83, 25 +; VI-NEXT: v_writelane_b32 v20, s84, 26 +; VI-NEXT: v_writelane_b32 v20, s85, 27 +; VI-NEXT: v_writelane_b32 v20, s86, 28 +; VI-NEXT: v_writelane_b32 v20, s87, 29 +; VI-NEXT: v_writelane_b32 v20, s30, 30 +; VI-NEXT: v_writelane_b32 v20, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v20, s86, 30 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s45, v2 ; VI-NEXT: v_readfirstlane_b32 s42, v3 @@ -8681,7 +8701,6 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s4, v17 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v18 -; VI-NEXT: v_writelane_b32 v20, s87, 31 ; VI-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB13_4 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -9470,43 +9489,43 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 0x7c, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v20, 30 ; VI-NEXT: v_readlane_b32 s15, v21, 1 ; VI-NEXT: v_readlane_b32 s13, v21, 3 ; VI-NEXT: v_readlane_b32 s11, v21, 5 ; VI-NEXT: v_readlane_b32 s9, v21, 7 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s87, v20, 31 -; VI-NEXT: v_readlane_b32 s86, v20, 30 -; VI-NEXT: v_readlane_b32 s85, v20, 29 -; VI-NEXT: v_readlane_b32 s84, v20, 28 -; VI-NEXT: v_readlane_b32 s83, v20, 27 -; VI-NEXT: v_readlane_b32 s82, v20, 26 -; VI-NEXT: v_readlane_b32 s81, v20, 25 -; VI-NEXT: v_readlane_b32 s80, v20, 24 -; VI-NEXT: v_readlane_b32 s71, v20, 23 -; VI-NEXT: v_readlane_b32 s70, v20, 22 -; VI-NEXT: v_readlane_b32 s69, v20, 21 -; VI-NEXT: v_readlane_b32 s68, v20, 20 -; VI-NEXT: v_readlane_b32 s67, v20, 19 -; VI-NEXT: v_readlane_b32 s66, v20, 18 -; VI-NEXT: v_readlane_b32 s65, v20, 17 -; VI-NEXT: v_readlane_b32 s64, v20, 16 -; VI-NEXT: v_readlane_b32 s55, v20, 15 -; VI-NEXT: v_readlane_b32 s54, v20, 14 -; VI-NEXT: v_readlane_b32 s53, v20, 13 -; VI-NEXT: v_readlane_b32 s52, v20, 12 -; VI-NEXT: v_readlane_b32 s51, v20, 11 -; VI-NEXT: v_readlane_b32 s50, v20, 10 -; VI-NEXT: v_readlane_b32 s49, v20, 9 -; VI-NEXT: v_readlane_b32 s48, v20, 8 -; VI-NEXT: v_readlane_b32 s39, v20, 7 -; VI-NEXT: v_readlane_b32 s38, v20, 6 -; VI-NEXT: v_readlane_b32 s37, v20, 5 -; VI-NEXT: v_readlane_b32 s36, v20, 4 -; VI-NEXT: v_readlane_b32 s35, v20, 3 -; VI-NEXT: v_readlane_b32 s34, v20, 2 -; VI-NEXT: v_readlane_b32 s31, v20, 1 -; VI-NEXT: v_readlane_b32 s30, v20, 0 +; VI-NEXT: v_readlane_b32 s31, v20, 31 +; VI-NEXT: v_readlane_b32 s87, v20, 29 +; VI-NEXT: v_readlane_b32 s86, v20, 28 +; VI-NEXT: v_readlane_b32 s85, v20, 27 +; VI-NEXT: v_readlane_b32 s84, v20, 26 +; VI-NEXT: v_readlane_b32 s83, v20, 25 +; VI-NEXT: v_readlane_b32 s82, v20, 24 +; VI-NEXT: v_readlane_b32 s81, v20, 23 +; VI-NEXT: v_readlane_b32 s80, v20, 22 +; VI-NEXT: v_readlane_b32 s71, v20, 21 +; VI-NEXT: v_readlane_b32 s70, v20, 20 +; VI-NEXT: v_readlane_b32 s69, v20, 19 +; VI-NEXT: v_readlane_b32 s68, v20, 18 +; VI-NEXT: v_readlane_b32 s67, v20, 17 +; VI-NEXT: v_readlane_b32 s66, v20, 16 +; VI-NEXT: v_readlane_b32 s65, v20, 15 +; VI-NEXT: v_readlane_b32 s64, v20, 14 +; VI-NEXT: v_readlane_b32 s55, v20, 13 +; VI-NEXT: v_readlane_b32 s54, v20, 12 +; VI-NEXT: v_readlane_b32 s53, v20, 11 +; VI-NEXT: v_readlane_b32 s52, v20, 10 +; VI-NEXT: v_readlane_b32 s51, v20, 9 +; VI-NEXT: v_readlane_b32 s50, v20, 8 +; VI-NEXT: v_readlane_b32 s49, v20, 7 +; VI-NEXT: v_readlane_b32 s48, v20, 6 +; VI-NEXT: v_readlane_b32 s39, v20, 5 +; VI-NEXT: v_readlane_b32 s38, v20, 4 +; VI-NEXT: v_readlane_b32 s37, v20, 3 +; VI-NEXT: v_readlane_b32 s36, v20, 2 +; VI-NEXT: v_readlane_b32 s35, v20, 1 +; VI-NEXT: v_readlane_b32 s34, v20, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -9679,42 +9698,43 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_writelane_b32 v20, s31, 1 -; GFX9-NEXT: v_writelane_b32 v20, s34, 2 -; GFX9-NEXT: v_writelane_b32 v20, s35, 3 -; GFX9-NEXT: v_writelane_b32 v20, s36, 4 -; GFX9-NEXT: v_writelane_b32 v20, s37, 5 -; GFX9-NEXT: v_writelane_b32 v20, s38, 6 -; GFX9-NEXT: v_writelane_b32 v20, s39, 7 -; GFX9-NEXT: v_writelane_b32 v20, s48, 8 -; GFX9-NEXT: v_writelane_b32 v20, s49, 9 -; GFX9-NEXT: v_writelane_b32 v20, s50, 10 -; GFX9-NEXT: v_writelane_b32 v20, s51, 11 -; GFX9-NEXT: v_writelane_b32 v20, s52, 12 -; GFX9-NEXT: v_writelane_b32 v20, s53, 13 -; GFX9-NEXT: v_writelane_b32 v20, s54, 14 -; GFX9-NEXT: v_writelane_b32 v20, s55, 15 -; GFX9-NEXT: v_writelane_b32 v20, s64, 16 -; GFX9-NEXT: v_writelane_b32 v20, s65, 17 -; GFX9-NEXT: v_writelane_b32 v20, s66, 18 -; GFX9-NEXT: v_writelane_b32 v20, s67, 19 -; GFX9-NEXT: v_writelane_b32 v20, s68, 20 -; GFX9-NEXT: v_writelane_b32 v20, s69, 21 -; GFX9-NEXT: v_writelane_b32 v20, s70, 22 -; GFX9-NEXT: v_writelane_b32 v20, s71, 23 -; GFX9-NEXT: v_writelane_b32 v20, s80, 24 -; GFX9-NEXT: v_writelane_b32 v20, s81, 25 -; GFX9-NEXT: v_writelane_b32 v20, s82, 26 -; GFX9-NEXT: v_writelane_b32 v20, s83, 27 -; GFX9-NEXT: v_writelane_b32 v20, s84, 28 -; GFX9-NEXT: v_writelane_b32 v20, s85, 29 -; GFX9-NEXT: v_writelane_b32 v20, s86, 30 -; GFX9-NEXT: v_writelane_b32 v20, s87, 31 -; GFX9-NEXT: v_writelane_b32 v20, s96, 32 -; GFX9-NEXT: v_writelane_b32 v20, s97, 33 +; GFX9-NEXT: v_writelane_b32 v20, s34, 0 +; GFX9-NEXT: v_writelane_b32 v20, s35, 1 +; GFX9-NEXT: v_writelane_b32 v20, s36, 2 +; GFX9-NEXT: v_writelane_b32 v20, s37, 3 +; GFX9-NEXT: v_writelane_b32 v20, s38, 4 +; GFX9-NEXT: v_writelane_b32 v20, s39, 5 +; GFX9-NEXT: v_writelane_b32 v20, s48, 6 +; GFX9-NEXT: v_writelane_b32 v20, s49, 7 +; GFX9-NEXT: v_writelane_b32 v20, s50, 8 +; GFX9-NEXT: v_writelane_b32 v20, s51, 9 +; GFX9-NEXT: v_writelane_b32 v20, s52, 10 +; GFX9-NEXT: v_writelane_b32 v20, s53, 11 +; GFX9-NEXT: v_writelane_b32 v20, s54, 12 +; GFX9-NEXT: v_writelane_b32 v20, s55, 13 +; GFX9-NEXT: v_writelane_b32 v20, s64, 14 +; GFX9-NEXT: v_writelane_b32 v20, s65, 15 +; GFX9-NEXT: v_writelane_b32 v20, s66, 16 +; GFX9-NEXT: v_writelane_b32 v20, s67, 17 +; GFX9-NEXT: v_writelane_b32 v20, s68, 18 +; GFX9-NEXT: v_writelane_b32 v20, s69, 19 +; GFX9-NEXT: v_writelane_b32 v20, s70, 20 +; GFX9-NEXT: v_writelane_b32 v20, s71, 21 +; GFX9-NEXT: v_writelane_b32 v20, s80, 22 +; GFX9-NEXT: v_writelane_b32 v20, s81, 23 +; GFX9-NEXT: v_writelane_b32 v20, s82, 24 +; GFX9-NEXT: v_writelane_b32 v20, s83, 25 +; GFX9-NEXT: v_writelane_b32 v20, s84, 26 +; GFX9-NEXT: v_writelane_b32 v20, s85, 27 +; GFX9-NEXT: v_writelane_b32 v20, s86, 28 +; GFX9-NEXT: v_writelane_b32 v20, s87, 29 +; GFX9-NEXT: v_writelane_b32 v20, s96, 30 +; GFX9-NEXT: v_writelane_b32 v20, s97, 31 +; GFX9-NEXT: v_writelane_b32 v20, s98, 32 +; GFX9-NEXT: v_writelane_b32 v20, s99, 33 +; GFX9-NEXT: v_writelane_b32 v20, s30, 34 +; GFX9-NEXT: v_writelane_b32 v20, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v20, s98, 34 ; GFX9-NEXT: v_readfirstlane_b32 s44, v1 ; GFX9-NEXT: v_readfirstlane_b32 s45, v2 ; GFX9-NEXT: v_readfirstlane_b32 s42, v3 @@ -9734,7 +9754,6 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX9-NEXT: v_readfirstlane_b32 s4, v17 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v18 -; GFX9-NEXT: v_writelane_b32 v20, s99, 35 ; GFX9-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB13_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -10468,44 +10487,44 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:120 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v20, 34 ; GFX9-NEXT: v_readlane_b32 s9, v21, 1 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:124 -; GFX9-NEXT: v_readlane_b32 s99, v20, 35 -; GFX9-NEXT: v_readlane_b32 s98, v20, 34 -; GFX9-NEXT: v_readlane_b32 s97, v20, 33 -; GFX9-NEXT: v_readlane_b32 s96, v20, 32 -; GFX9-NEXT: v_readlane_b32 s87, v20, 31 -; GFX9-NEXT: v_readlane_b32 s86, v20, 30 -; GFX9-NEXT: v_readlane_b32 s85, v20, 29 -; GFX9-NEXT: v_readlane_b32 s84, v20, 28 -; GFX9-NEXT: v_readlane_b32 s83, v20, 27 -; GFX9-NEXT: v_readlane_b32 s82, v20, 26 -; GFX9-NEXT: v_readlane_b32 s81, v20, 25 -; GFX9-NEXT: v_readlane_b32 s80, v20, 24 -; GFX9-NEXT: v_readlane_b32 s71, v20, 23 -; GFX9-NEXT: v_readlane_b32 s70, v20, 22 -; GFX9-NEXT: v_readlane_b32 s69, v20, 21 -; GFX9-NEXT: v_readlane_b32 s68, v20, 20 -; GFX9-NEXT: v_readlane_b32 s67, v20, 19 -; GFX9-NEXT: v_readlane_b32 s66, v20, 18 -; GFX9-NEXT: v_readlane_b32 s65, v20, 17 -; GFX9-NEXT: v_readlane_b32 s64, v20, 16 -; GFX9-NEXT: v_readlane_b32 s55, v20, 15 -; GFX9-NEXT: v_readlane_b32 s54, v20, 14 -; GFX9-NEXT: v_readlane_b32 s53, v20, 13 -; GFX9-NEXT: v_readlane_b32 s52, v20, 12 -; GFX9-NEXT: v_readlane_b32 s51, v20, 11 -; GFX9-NEXT: v_readlane_b32 s50, v20, 10 -; GFX9-NEXT: v_readlane_b32 s49, v20, 9 -; GFX9-NEXT: v_readlane_b32 s48, v20, 8 -; GFX9-NEXT: v_readlane_b32 s39, v20, 7 -; GFX9-NEXT: v_readlane_b32 s38, v20, 6 -; GFX9-NEXT: v_readlane_b32 s37, v20, 5 -; GFX9-NEXT: v_readlane_b32 s36, v20, 4 -; GFX9-NEXT: v_readlane_b32 s35, v20, 3 -; GFX9-NEXT: v_readlane_b32 s34, v20, 2 -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 -; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 35 +; GFX9-NEXT: v_readlane_b32 s99, v20, 33 +; GFX9-NEXT: v_readlane_b32 s98, v20, 32 +; GFX9-NEXT: v_readlane_b32 s97, v20, 31 +; GFX9-NEXT: v_readlane_b32 s96, v20, 30 +; GFX9-NEXT: v_readlane_b32 s87, v20, 29 +; GFX9-NEXT: v_readlane_b32 s86, v20, 28 +; GFX9-NEXT: v_readlane_b32 s85, v20, 27 +; GFX9-NEXT: v_readlane_b32 s84, v20, 26 +; GFX9-NEXT: v_readlane_b32 s83, v20, 25 +; GFX9-NEXT: v_readlane_b32 s82, v20, 24 +; GFX9-NEXT: v_readlane_b32 s81, v20, 23 +; GFX9-NEXT: v_readlane_b32 s80, v20, 22 +; GFX9-NEXT: v_readlane_b32 s71, v20, 21 +; GFX9-NEXT: v_readlane_b32 s70, v20, 20 +; GFX9-NEXT: v_readlane_b32 s69, v20, 19 +; GFX9-NEXT: v_readlane_b32 s68, v20, 18 +; GFX9-NEXT: v_readlane_b32 s67, v20, 17 +; GFX9-NEXT: v_readlane_b32 s66, v20, 16 +; GFX9-NEXT: v_readlane_b32 s65, v20, 15 +; GFX9-NEXT: v_readlane_b32 s64, v20, 14 +; GFX9-NEXT: v_readlane_b32 s55, v20, 13 +; GFX9-NEXT: v_readlane_b32 s54, v20, 12 +; GFX9-NEXT: v_readlane_b32 s53, v20, 11 +; GFX9-NEXT: v_readlane_b32 s52, v20, 10 +; GFX9-NEXT: v_readlane_b32 s51, v20, 9 +; GFX9-NEXT: v_readlane_b32 s50, v20, 8 +; GFX9-NEXT: v_readlane_b32 s49, v20, 7 +; GFX9-NEXT: v_readlane_b32 s48, v20, 6 +; GFX9-NEXT: v_readlane_b32 s39, v20, 5 +; GFX9-NEXT: v_readlane_b32 s38, v20, 4 +; GFX9-NEXT: v_readlane_b32 s37, v20, 3 +; GFX9-NEXT: v_readlane_b32 s36, v20, 2 +; GFX9-NEXT: v_readlane_b32 s35, v20, 1 +; GFX9-NEXT: v_readlane_b32 s34, v20, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -10668,70 +10687,73 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v16, s32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v17, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v18, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v19, s32 offset:12 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v16, s30, 0 -; GFX11-NEXT: v_writelane_b32 v17, s96, 0 +; GFX11-NEXT: v_writelane_b32 v16, s34, 0 +; GFX11-NEXT: v_writelane_b32 v16, s35, 1 +; GFX11-NEXT: v_writelane_b32 v16, s36, 2 +; GFX11-NEXT: v_writelane_b32 v16, s37, 3 +; GFX11-NEXT: v_writelane_b32 v16, s38, 4 +; GFX11-NEXT: v_writelane_b32 v16, s39, 5 +; GFX11-NEXT: v_writelane_b32 v16, s48, 6 +; GFX11-NEXT: v_writelane_b32 v16, s49, 7 +; GFX11-NEXT: v_writelane_b32 v16, s50, 8 +; GFX11-NEXT: v_writelane_b32 v16, s51, 9 +; GFX11-NEXT: v_writelane_b32 v16, s52, 10 +; GFX11-NEXT: v_writelane_b32 v16, s53, 11 +; GFX11-NEXT: v_writelane_b32 v16, s54, 12 +; GFX11-NEXT: v_writelane_b32 v16, s55, 13 +; GFX11-NEXT: v_writelane_b32 v16, s64, 14 +; GFX11-NEXT: v_writelane_b32 v16, s65, 15 +; GFX11-NEXT: v_writelane_b32 v16, s66, 16 +; GFX11-NEXT: v_writelane_b32 v16, s67, 17 +; GFX11-NEXT: v_writelane_b32 v16, s68, 18 +; GFX11-NEXT: v_writelane_b32 v16, s69, 19 +; GFX11-NEXT: v_writelane_b32 v16, s70, 20 +; GFX11-NEXT: v_writelane_b32 v16, s71, 21 +; GFX11-NEXT: v_writelane_b32 v16, s80, 22 +; GFX11-NEXT: v_writelane_b32 v16, s81, 23 +; GFX11-NEXT: v_writelane_b32 v16, s82, 24 +; GFX11-NEXT: v_writelane_b32 v16, s83, 25 +; GFX11-NEXT: v_writelane_b32 v16, s84, 26 +; GFX11-NEXT: v_writelane_b32 v16, s85, 27 +; GFX11-NEXT: v_writelane_b32 v16, s86, 28 +; GFX11-NEXT: v_writelane_b32 v16, s87, 29 +; GFX11-NEXT: v_writelane_b32 v16, s96, 30 +; GFX11-NEXT: v_writelane_b32 v16, s97, 31 +; GFX11-NEXT: v_writelane_b32 v17, s98, 0 +; GFX11-NEXT: v_writelane_b32 v17, s99, 1 +; GFX11-NEXT: v_writelane_b32 v17, s100, 2 +; GFX11-NEXT: v_writelane_b32 v17, s101, 3 +; GFX11-NEXT: v_writelane_b32 v17, s102, 4 +; GFX11-NEXT: v_writelane_b32 v17, s103, 5 +; GFX11-NEXT: v_writelane_b32 v17, s104, 6 +; GFX11-NEXT: v_writelane_b32 v17, s30, 7 +; GFX11-NEXT: v_writelane_b32 v17, s31, 8 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 ; GFX11-NEXT: v_readfirstlane_b32 s40, v1 ; GFX11-NEXT: v_readfirstlane_b32 s41, v2 -; GFX11-NEXT: v_writelane_b32 v16, s31, 1 -; GFX11-NEXT: v_writelane_b32 v17, s97, 1 ; GFX11-NEXT: v_readfirstlane_b32 s14, v3 ; GFX11-NEXT: v_readfirstlane_b32 s15, v4 ; GFX11-NEXT: v_readfirstlane_b32 s12, v5 -; GFX11-NEXT: v_writelane_b32 v16, s34, 2 -; GFX11-NEXT: v_writelane_b32 v17, s98, 2 ; GFX11-NEXT: v_readfirstlane_b32 s13, v6 ; GFX11-NEXT: v_readfirstlane_b32 s10, v7 ; GFX11-NEXT: v_readfirstlane_b32 s11, v8 -; GFX11-NEXT: v_writelane_b32 v16, s35, 3 -; GFX11-NEXT: v_writelane_b32 v17, s99, 3 ; GFX11-NEXT: v_readfirstlane_b32 s8, v9 ; GFX11-NEXT: v_readfirstlane_b32 s9, v10 ; GFX11-NEXT: v_readfirstlane_b32 s6, v11 -; GFX11-NEXT: v_writelane_b32 v16, s36, 4 -; GFX11-NEXT: v_writelane_b32 v17, s100, 4 ; GFX11-NEXT: v_readfirstlane_b32 s7, v12 ; GFX11-NEXT: v_readfirstlane_b32 s4, v13 ; GFX11-NEXT: v_readfirstlane_b32 s5, v14 -; GFX11-NEXT: v_writelane_b32 v16, s37, 5 -; GFX11-NEXT: v_writelane_b32 v17, s101, 5 ; GFX11-NEXT: s_mov_b32 s101, 0 ; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr19 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr18 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v16, s38, 6 -; GFX11-NEXT: v_writelane_b32 v17, s102, 6 -; GFX11-NEXT: v_writelane_b32 v16, s39, 7 -; GFX11-NEXT: v_writelane_b32 v17, s103, 7 -; GFX11-NEXT: v_writelane_b32 v16, s48, 8 -; GFX11-NEXT: v_writelane_b32 v17, s104, 8 -; GFX11-NEXT: v_writelane_b32 v16, s49, 9 -; GFX11-NEXT: v_writelane_b32 v16, s50, 10 -; GFX11-NEXT: v_writelane_b32 v16, s51, 11 -; GFX11-NEXT: v_writelane_b32 v16, s52, 12 -; GFX11-NEXT: v_writelane_b32 v16, s53, 13 -; GFX11-NEXT: v_writelane_b32 v16, s54, 14 -; GFX11-NEXT: v_writelane_b32 v16, s55, 15 -; GFX11-NEXT: v_writelane_b32 v16, s64, 16 -; GFX11-NEXT: v_writelane_b32 v16, s65, 17 -; GFX11-NEXT: v_writelane_b32 v16, s66, 18 -; GFX11-NEXT: v_writelane_b32 v16, s67, 19 -; GFX11-NEXT: v_writelane_b32 v16, s68, 20 -; GFX11-NEXT: v_writelane_b32 v16, s69, 21 -; GFX11-NEXT: v_writelane_b32 v16, s70, 22 -; GFX11-NEXT: v_writelane_b32 v16, s71, 23 -; GFX11-NEXT: v_writelane_b32 v16, s80, 24 -; GFX11-NEXT: v_writelane_b32 v16, s81, 25 -; GFX11-NEXT: v_writelane_b32 v16, s82, 26 -; GFX11-NEXT: v_writelane_b32 v16, s83, 27 -; GFX11-NEXT: v_writelane_b32 v16, s84, 28 -; GFX11-NEXT: v_writelane_b32 v16, s85, 29 -; GFX11-NEXT: v_writelane_b32 v16, s86, 30 -; GFX11-NEXT: v_writelane_b32 v16, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB13_2 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s43, s25, 8 @@ -11258,9 +11280,9 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff ; GFX11-NEXT: v_readlane_b32 s17, v19, 29 ; GFX11-NEXT: s_and_b32 s16, s16, 0xff -; GFX11-NEXT: v_readlane_b32 s100, v17, 4 +; GFX11-NEXT: v_readlane_b32 s30, v17, 7 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-NEXT: v_readlane_b32 s99, v17, 3 +; GFX11-NEXT: v_readlane_b32 s31, v17, 8 ; GFX11-NEXT: s_or_b32 s2, s2, s3 ; GFX11-NEXT: s_and_b32 s3, s18, 0xff ; GFX11-NEXT: s_lshl_b32 s2, s2, 16 @@ -11294,7 +11316,7 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: s_or_b32 s16, s16, s17 ; GFX11-NEXT: v_readlane_b32 s17, v19, 21 ; GFX11-NEXT: s_lshl_b32 s16, s16, 16 -; GFX11-NEXT: v_readlane_b32 s98, v17, 2 +; GFX11-NEXT: v_readlane_b32 s100, v17, 2 ; GFX11-NEXT: s_or_b32 s3, s3, s16 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_dual_mov_b32 v7, s2 :: v_dual_mov_b32 v8, s3 @@ -11314,12 +11336,12 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: s_or_b32 s0, s0, s1 ; GFX11-NEXT: s_and_b32 s1, s21, 0xff ; GFX11-NEXT: s_lshl_b32 s2, s2, 8 -; GFX11-NEXT: v_readlane_b32 s86, v16, 30 +; GFX11-NEXT: v_readlane_b32 s99, v17, 1 ; GFX11-NEXT: s_or_b32 s1, s1, s2 ; GFX11-NEXT: v_readlane_b32 s2, v19, 25 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-NEXT: v_readlane_b32 s31, v16, 1 -; GFX11-NEXT: v_readlane_b32 s30, v16, 0 +; GFX11-NEXT: v_readlane_b32 s98, v17, 0 +; GFX11-NEXT: v_readlane_b32 s86, v16, 28 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_or_b32 s2, s2, s3 @@ -11356,9 +11378,9 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: s_lshl_b32 s3, s88, 8 ; GFX11-NEXT: s_and_b32 s16, s69, 0xff ; GFX11-NEXT: s_and_b32 s18, s72, 0xff -; GFX11-NEXT: v_readlane_b32 s97, v17, 1 +; GFX11-NEXT: v_readlane_b32 s97, v16, 31 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-NEXT: v_readlane_b32 s69, v16, 21 +; GFX11-NEXT: v_readlane_b32 s69, v16, 19 ; GFX11-NEXT: s_or_b32 s1, s2, s3 ; GFX11-NEXT: v_readlane_b32 s3, v19, 16 ; GFX11-NEXT: s_and_b32 s2, s25, 0xff @@ -11565,39 +11587,39 @@ define inreg <128 x i8> @bitcast_v32i32_to_v128i8_scalar(<32 x i32> inreg %a, i3 ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:96 ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:112 -; GFX11-NEXT: v_readlane_b32 s104, v17, 8 -; GFX11-NEXT: v_readlane_b32 s103, v17, 7 -; GFX11-NEXT: v_readlane_b32 s102, v17, 6 -; GFX11-NEXT: v_readlane_b32 s101, v17, 5 -; GFX11-NEXT: v_readlane_b32 s96, v17, 0 -; GFX11-NEXT: v_readlane_b32 s87, v16, 31 -; GFX11-NEXT: v_readlane_b32 s85, v16, 29 -; GFX11-NEXT: v_readlane_b32 s84, v16, 28 -; GFX11-NEXT: v_readlane_b32 s83, v16, 27 -; GFX11-NEXT: v_readlane_b32 s82, v16, 26 -; GFX11-NEXT: v_readlane_b32 s81, v16, 25 -; GFX11-NEXT: v_readlane_b32 s80, v16, 24 -; GFX11-NEXT: v_readlane_b32 s71, v16, 23 -; GFX11-NEXT: v_readlane_b32 s70, v16, 22 -; GFX11-NEXT: v_readlane_b32 s68, v16, 20 -; GFX11-NEXT: v_readlane_b32 s67, v16, 19 -; GFX11-NEXT: v_readlane_b32 s66, v16, 18 -; GFX11-NEXT: v_readlane_b32 s65, v16, 17 -; GFX11-NEXT: v_readlane_b32 s64, v16, 16 -; GFX11-NEXT: v_readlane_b32 s55, v16, 15 -; GFX11-NEXT: v_readlane_b32 s54, v16, 14 -; GFX11-NEXT: v_readlane_b32 s53, v16, 13 -; GFX11-NEXT: v_readlane_b32 s52, v16, 12 -; GFX11-NEXT: v_readlane_b32 s51, v16, 11 -; GFX11-NEXT: v_readlane_b32 s50, v16, 10 -; GFX11-NEXT: v_readlane_b32 s49, v16, 9 -; GFX11-NEXT: v_readlane_b32 s48, v16, 8 -; GFX11-NEXT: v_readlane_b32 s39, v16, 7 -; GFX11-NEXT: v_readlane_b32 s38, v16, 6 -; GFX11-NEXT: v_readlane_b32 s37, v16, 5 -; GFX11-NEXT: v_readlane_b32 s36, v16, 4 -; GFX11-NEXT: v_readlane_b32 s35, v16, 3 -; GFX11-NEXT: v_readlane_b32 s34, v16, 2 +; GFX11-NEXT: v_readlane_b32 s104, v17, 6 +; GFX11-NEXT: v_readlane_b32 s103, v17, 5 +; GFX11-NEXT: v_readlane_b32 s102, v17, 4 +; GFX11-NEXT: v_readlane_b32 s101, v17, 3 +; GFX11-NEXT: v_readlane_b32 s96, v16, 30 +; GFX11-NEXT: v_readlane_b32 s87, v16, 29 +; GFX11-NEXT: v_readlane_b32 s85, v16, 27 +; GFX11-NEXT: v_readlane_b32 s84, v16, 26 +; GFX11-NEXT: v_readlane_b32 s83, v16, 25 +; GFX11-NEXT: v_readlane_b32 s82, v16, 24 +; GFX11-NEXT: v_readlane_b32 s81, v16, 23 +; GFX11-NEXT: v_readlane_b32 s80, v16, 22 +; GFX11-NEXT: v_readlane_b32 s71, v16, 21 +; GFX11-NEXT: v_readlane_b32 s70, v16, 20 +; GFX11-NEXT: v_readlane_b32 s68, v16, 18 +; GFX11-NEXT: v_readlane_b32 s67, v16, 17 +; GFX11-NEXT: v_readlane_b32 s66, v16, 16 +; GFX11-NEXT: v_readlane_b32 s65, v16, 15 +; GFX11-NEXT: v_readlane_b32 s64, v16, 14 +; GFX11-NEXT: v_readlane_b32 s55, v16, 13 +; GFX11-NEXT: v_readlane_b32 s54, v16, 12 +; GFX11-NEXT: v_readlane_b32 s53, v16, 11 +; GFX11-NEXT: v_readlane_b32 s52, v16, 10 +; GFX11-NEXT: v_readlane_b32 s51, v16, 9 +; GFX11-NEXT: v_readlane_b32 s50, v16, 8 +; GFX11-NEXT: v_readlane_b32 s49, v16, 7 +; GFX11-NEXT: v_readlane_b32 s48, v16, 6 +; GFX11-NEXT: v_readlane_b32 s39, v16, 5 +; GFX11-NEXT: v_readlane_b32 s38, v16, 4 +; GFX11-NEXT: v_readlane_b32 s37, v16, 3 +; GFX11-NEXT: v_readlane_b32 s36, v16, 2 +; GFX11-NEXT: v_readlane_b32 s35, v16, 1 +; GFX11-NEXT: v_readlane_b32 s34, v16, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v16, off, s32 @@ -16364,56 +16386,105 @@ define <32 x i32> @bitcast_v128i8_to_v32i32(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:592 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:588 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:584 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:468 ; GFX11-FAKE16-NEXT: s_clause 0x12 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -20685,45 +20756,83 @@ define inreg <32 x i32> @bitcast_v128i8_to_v32i32_scalar(<128 x i8> inreg %a, i3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-TRUE16-NEXT: s_clause 0x7 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -21626,45 +21735,83 @@ define inreg <32 x i32> @bitcast_v128i8_to_v32i32_scalar(<128 x i8> inreg %a, i3 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-FAKE16-NEXT: s_clause 0x7 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -23488,43 +23635,43 @@ define inreg <64 x bfloat> @bitcast_v32i32_to_v64bf16_scalar(<32 x i32> inreg %a ; SI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 -; SI-NEXT: v_writelane_b32 v20, s69, 21 -; SI-NEXT: v_writelane_b32 v20, s70, 22 -; SI-NEXT: v_writelane_b32 v20, s71, 23 -; SI-NEXT: v_writelane_b32 v20, s80, 24 -; SI-NEXT: v_writelane_b32 v20, s81, 25 -; SI-NEXT: v_writelane_b32 v20, s82, 26 -; SI-NEXT: v_writelane_b32 v20, s83, 27 -; SI-NEXT: v_writelane_b32 v20, s84, 28 -; SI-NEXT: v_writelane_b32 v20, s85, 29 -; SI-NEXT: v_writelane_b32 v20, s86, 30 -; SI-NEXT: v_writelane_b32 v20, s87, 31 -; SI-NEXT: v_writelane_b32 v20, s96, 32 -; SI-NEXT: v_writelane_b32 v20, s97, 33 -; SI-NEXT: v_writelane_b32 v20, s98, 34 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s70, 20 +; SI-NEXT: v_writelane_b32 v20, s71, 21 +; SI-NEXT: v_writelane_b32 v20, s80, 22 +; SI-NEXT: v_writelane_b32 v20, s81, 23 +; SI-NEXT: v_writelane_b32 v20, s82, 24 +; SI-NEXT: v_writelane_b32 v20, s83, 25 +; SI-NEXT: v_writelane_b32 v20, s84, 26 +; SI-NEXT: v_writelane_b32 v20, s85, 27 +; SI-NEXT: v_writelane_b32 v20, s86, 28 +; SI-NEXT: v_writelane_b32 v20, s87, 29 +; SI-NEXT: v_writelane_b32 v20, s96, 30 +; SI-NEXT: v_writelane_b32 v20, s97, 31 +; SI-NEXT: v_writelane_b32 v20, s98, 32 +; SI-NEXT: v_writelane_b32 v20, s99, 33 +; SI-NEXT: v_writelane_b32 v20, s30, 34 +; SI-NEXT: v_writelane_b32 v20, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s70, v1 ; SI-NEXT: v_readfirstlane_b32 s71, v2 ; SI-NEXT: v_readfirstlane_b32 s80, v3 @@ -24044,43 +24191,43 @@ define inreg <64 x bfloat> @bitcast_v32i32_to_v64bf16_scalar(<32 x i32> inreg %a ; SI-NEXT: v_mul_f32_e64 v2, 1.0, s4 ; SI-NEXT: v_alignbit_b32 v1, v1, v2, 16 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 +; SI-NEXT: v_readlane_b32 s30, v20, 34 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v20, 35 -; SI-NEXT: v_readlane_b32 s98, v20, 34 -; SI-NEXT: v_readlane_b32 s97, v20, 33 -; SI-NEXT: v_readlane_b32 s96, v20, 32 -; SI-NEXT: v_readlane_b32 s87, v20, 31 -; SI-NEXT: v_readlane_b32 s86, v20, 30 -; SI-NEXT: v_readlane_b32 s85, v20, 29 -; SI-NEXT: v_readlane_b32 s84, v20, 28 -; SI-NEXT: v_readlane_b32 s83, v20, 27 -; SI-NEXT: v_readlane_b32 s82, v20, 26 -; SI-NEXT: v_readlane_b32 s81, v20, 25 -; SI-NEXT: v_readlane_b32 s80, v20, 24 -; SI-NEXT: v_readlane_b32 s71, v20, 23 -; SI-NEXT: v_readlane_b32 s70, v20, 22 -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 35 +; SI-NEXT: v_readlane_b32 s99, v20, 33 +; SI-NEXT: v_readlane_b32 s98, v20, 32 +; SI-NEXT: v_readlane_b32 s97, v20, 31 +; SI-NEXT: v_readlane_b32 s96, v20, 30 +; SI-NEXT: v_readlane_b32 s87, v20, 29 +; SI-NEXT: v_readlane_b32 s86, v20, 28 +; SI-NEXT: v_readlane_b32 s85, v20, 27 +; SI-NEXT: v_readlane_b32 s84, v20, 26 +; SI-NEXT: v_readlane_b32 s83, v20, 25 +; SI-NEXT: v_readlane_b32 s82, v20, 24 +; SI-NEXT: v_readlane_b32 s81, v20, 23 +; SI-NEXT: v_readlane_b32 s80, v20, 22 +; SI-NEXT: v_readlane_b32 s71, v20, 21 +; SI-NEXT: v_readlane_b32 s70, v20, 20 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -26131,20 +26278,35 @@ define <32 x i32> @bitcast_v64bf16_to_v32i32(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:8 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 @@ -29180,81 +29342,149 @@ define inreg <32 x i32> @bitcast_v64bf16_to_v32i32_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v64bf16_to_v32i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 ; GFX11-TRUE16-NEXT: s_clause 0x6 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 @@ -30154,83 +30384,153 @@ define inreg <32 x i32> @bitcast_v64bf16_to_v32i32_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v32i32_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 ; GFX11-FAKE16-NEXT: s_clause 0x8 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 @@ -32001,12 +32301,26 @@ define inreg <64 x half> @bitcast_v32i32_to_v64f16_scalar(<32 x i32> inreg %a, i ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s30, 2 +; SI-NEXT: v_writelane_b32 v63, s31, 3 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s35, 3 ; SI-NEXT: v_readfirstlane_b32 s47, v1 ; SI-NEXT: v_readfirstlane_b32 s46, v2 ; SI-NEXT: v_readfirstlane_b32 s45, v3 @@ -32026,21 +32340,6 @@ define inreg <64 x half> @bitcast_v32i32_to_v64f16_scalar(<32 x i32> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v17 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB21_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -32282,15 +32581,15 @@ define inreg <64 x half> @bitcast_v32i32_to_v64f16_scalar(<32 x i32> inreg %a, i ; SI-NEXT: v_cvt_f16_f32_e32 v61, v61 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NEXT: v_readlane_b32 s35, v63, 3 +; SI-NEXT: v_readlane_b32 s30, v63, 2 ; SI-NEXT: v_lshlrev_b32_e32 v61, 16, v61 ; SI-NEXT: v_or_b32_e32 v2, v2, v61 ; SI-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v2, v59 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s31, v63, 3 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 ; SI-NEXT: v_add_i32_e32 v2, vcc, 4, v0 @@ -34731,84 +35030,155 @@ define inreg <32 x i32> @bitcast_v64f16_to_v32i32_scalar(<64 x half> inreg %a, i ; GFX11-LABEL: bitcast_v64f16_to_v32i32_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -35564,28 +35934,29 @@ define inreg <64 x i16> @bitcast_v32i32_to_v64i16_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s30, 20 +; SI-NEXT: v_writelane_b32 v20, s31, 21 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -35605,7 +35976,6 @@ define inreg <64 x i16> @bitcast_v32i32_to_v64i16_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: v_writelane_b32 v20, s69, 21 ; SI-NEXT: s_cbranch_scc0 .LBB25_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s38, s5, 16 @@ -35924,29 +36294,29 @@ define inreg <64 x i16> @bitcast_v32i32_to_v64i16_scalar(<32 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v20, 20 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 21 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -37781,84 +38151,155 @@ define inreg <32 x i32> @bitcast_v64i16_to_v32i32_scalar(<64 x i16> inreg %a, i3 ; GFX11-LABEL: bitcast_v64i16_to_v32i32_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -43668,24 +44109,43 @@ define <128 x i8> @bitcast_v32f32_to_v128i8(<32 x float> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -44359,44 +44819,56 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_writelane_b32 v63, s97, 33 -; SI-NEXT: v_writelane_b32 v63, s98, 34 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -44416,19 +44888,6 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr61 : SGPR spill to VGPR lane ; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB37_3 @@ -45563,42 +46022,42 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; SI-NEXT: v_and_b32_e32 v6, 0xff, v6 ; SI-NEXT: v_and_b32_e32 v3, 0xff, v3 ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s30, v63, 34 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(1) expcnt(0) ; SI-NEXT: v_lshlrev_b32_e32 v18, 8, v22 ; SI-NEXT: v_and_b32_e32 v22, 0xff, v52 @@ -46098,39 +46557,53 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_writelane_b32 v63, s68, 20 -; VI-NEXT: v_writelane_b32 v63, s69, 21 -; VI-NEXT: v_writelane_b32 v63, s70, 22 -; VI-NEXT: v_writelane_b32 v63, s71, 23 -; VI-NEXT: v_writelane_b32 v63, s80, 24 -; VI-NEXT: v_writelane_b32 v63, s81, 25 -; VI-NEXT: v_writelane_b32 v63, s82, 26 -; VI-NEXT: v_writelane_b32 v63, s83, 27 -; VI-NEXT: v_writelane_b32 v63, s84, 28 -; VI-NEXT: v_writelane_b32 v63, s85, 29 -; VI-NEXT: v_writelane_b32 v63, s86, 30 +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s68, 18 +; VI-NEXT: v_writelane_b32 v63, s69, 19 +; VI-NEXT: v_writelane_b32 v63, s70, 20 +; VI-NEXT: v_writelane_b32 v63, s71, 21 +; VI-NEXT: v_writelane_b32 v63, s80, 22 +; VI-NEXT: v_writelane_b32 v63, s81, 23 +; VI-NEXT: v_writelane_b32 v63, s82, 24 +; VI-NEXT: v_writelane_b32 v63, s83, 25 +; VI-NEXT: v_writelane_b32 v63, s84, 26 +; VI-NEXT: v_writelane_b32 v63, s85, 27 +; VI-NEXT: v_writelane_b32 v63, s86, 28 +; VI-NEXT: v_writelane_b32 v63, s87, 29 +; VI-NEXT: v_writelane_b32 v63, s30, 30 +; VI-NEXT: v_writelane_b32 v63, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v63, s87, 31 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s45, v2 ; VI-NEXT: v_readfirstlane_b32 s42, v3 @@ -46150,20 +46623,6 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; VI-NEXT: v_readfirstlane_b32 s4, v17 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v18 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB37_3 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -47018,38 +47477,38 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; VI-NEXT: v_lshlrev_b32_e32 v23, 8, v50 ; VI-NEXT: v_or_b32_sdwa v23, v24, v23 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: v_lshlrev_b32_e32 v24, 8, v36 -; VI-NEXT: v_readlane_b32 s87, v63, 31 -; VI-NEXT: v_readlane_b32 s86, v63, 30 -; VI-NEXT: v_readlane_b32 s85, v63, 29 -; VI-NEXT: v_readlane_b32 s84, v63, 28 -; VI-NEXT: v_readlane_b32 s83, v63, 27 -; VI-NEXT: v_readlane_b32 s82, v63, 26 -; VI-NEXT: v_readlane_b32 s81, v63, 25 -; VI-NEXT: v_readlane_b32 s80, v63, 24 -; VI-NEXT: v_readlane_b32 s71, v63, 23 -; VI-NEXT: v_readlane_b32 s70, v63, 22 -; VI-NEXT: v_readlane_b32 s69, v63, 21 -; VI-NEXT: v_readlane_b32 s68, v63, 20 -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 30 +; VI-NEXT: v_readlane_b32 s31, v63, 31 +; VI-NEXT: v_readlane_b32 s87, v63, 29 +; VI-NEXT: v_readlane_b32 s86, v63, 28 +; VI-NEXT: v_readlane_b32 s85, v63, 27 +; VI-NEXT: v_readlane_b32 s84, v63, 26 +; VI-NEXT: v_readlane_b32 s83, v63, 25 +; VI-NEXT: v_readlane_b32 s82, v63, 24 +; VI-NEXT: v_readlane_b32 s81, v63, 23 +; VI-NEXT: v_readlane_b32 s80, v63, 22 +; VI-NEXT: v_readlane_b32 s71, v63, 21 +; VI-NEXT: v_readlane_b32 s70, v63, 20 +; VI-NEXT: v_readlane_b32 s69, v63, 19 +; VI-NEXT: v_readlane_b32 s68, v63, 18 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_or_b32_sdwa v24, v25, v24 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v23, v23, v24 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -47369,43 +47828,57 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_writelane_b32 v63, s64, 16 -; GFX9-NEXT: v_writelane_b32 v63, s65, 17 -; GFX9-NEXT: v_writelane_b32 v63, s66, 18 -; GFX9-NEXT: v_writelane_b32 v63, s67, 19 -; GFX9-NEXT: v_writelane_b32 v63, s68, 20 -; GFX9-NEXT: v_writelane_b32 v63, s69, 21 -; GFX9-NEXT: v_writelane_b32 v63, s70, 22 -; GFX9-NEXT: v_writelane_b32 v63, s71, 23 -; GFX9-NEXT: v_writelane_b32 v63, s80, 24 -; GFX9-NEXT: v_writelane_b32 v63, s81, 25 -; GFX9-NEXT: v_writelane_b32 v63, s82, 26 -; GFX9-NEXT: v_writelane_b32 v63, s83, 27 -; GFX9-NEXT: v_writelane_b32 v63, s84, 28 -; GFX9-NEXT: v_writelane_b32 v63, s85, 29 -; GFX9-NEXT: v_writelane_b32 v63, s86, 30 -; GFX9-NEXT: v_writelane_b32 v63, s87, 31 -; GFX9-NEXT: v_writelane_b32 v63, s96, 32 -; GFX9-NEXT: v_writelane_b32 v63, s97, 33 -; GFX9-NEXT: v_writelane_b32 v63, s98, 34 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s64, 14 +; GFX9-NEXT: v_writelane_b32 v63, s65, 15 +; GFX9-NEXT: v_writelane_b32 v63, s66, 16 +; GFX9-NEXT: v_writelane_b32 v63, s67, 17 +; GFX9-NEXT: v_writelane_b32 v63, s68, 18 +; GFX9-NEXT: v_writelane_b32 v63, s69, 19 +; GFX9-NEXT: v_writelane_b32 v63, s70, 20 +; GFX9-NEXT: v_writelane_b32 v63, s71, 21 +; GFX9-NEXT: v_writelane_b32 v63, s80, 22 +; GFX9-NEXT: v_writelane_b32 v63, s81, 23 +; GFX9-NEXT: v_writelane_b32 v63, s82, 24 +; GFX9-NEXT: v_writelane_b32 v63, s83, 25 +; GFX9-NEXT: v_writelane_b32 v63, s84, 26 +; GFX9-NEXT: v_writelane_b32 v63, s85, 27 +; GFX9-NEXT: v_writelane_b32 v63, s86, 28 +; GFX9-NEXT: v_writelane_b32 v63, s87, 29 +; GFX9-NEXT: v_writelane_b32 v63, s96, 30 +; GFX9-NEXT: v_writelane_b32 v63, s97, 31 +; GFX9-NEXT: v_writelane_b32 v63, s98, 32 +; GFX9-NEXT: v_writelane_b32 v63, s99, 33 +; GFX9-NEXT: v_writelane_b32 v63, s30, 34 +; GFX9-NEXT: v_writelane_b32 v63, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v63, s99, 35 ; GFX9-NEXT: v_readfirstlane_b32 s44, v1 ; GFX9-NEXT: v_readfirstlane_b32 s45, v2 ; GFX9-NEXT: v_readfirstlane_b32 s42, v3 @@ -47425,20 +47898,6 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; GFX9-NEXT: v_readfirstlane_b32 s4, v17 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v18 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB37_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -48291,42 +48750,42 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; GFX9-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:16 ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload ; GFX9-NEXT: v_lshlrev_b32_e32 v15, 8, v49 -; GFX9-NEXT: v_readlane_b32 s99, v63, 35 -; GFX9-NEXT: v_readlane_b32 s98, v63, 34 -; GFX9-NEXT: v_readlane_b32 s97, v63, 33 -; GFX9-NEXT: v_readlane_b32 s96, v63, 32 -; GFX9-NEXT: v_readlane_b32 s87, v63, 31 -; GFX9-NEXT: v_readlane_b32 s86, v63, 30 -; GFX9-NEXT: v_readlane_b32 s85, v63, 29 -; GFX9-NEXT: v_readlane_b32 s84, v63, 28 -; GFX9-NEXT: v_readlane_b32 s83, v63, 27 -; GFX9-NEXT: v_readlane_b32 s82, v63, 26 -; GFX9-NEXT: v_readlane_b32 s81, v63, 25 -; GFX9-NEXT: v_readlane_b32 s80, v63, 24 -; GFX9-NEXT: v_readlane_b32 s71, v63, 23 -; GFX9-NEXT: v_readlane_b32 s70, v63, 22 -; GFX9-NEXT: v_readlane_b32 s69, v63, 21 -; GFX9-NEXT: v_readlane_b32 s68, v63, 20 -; GFX9-NEXT: v_readlane_b32 s67, v63, 19 -; GFX9-NEXT: v_readlane_b32 s66, v63, 18 -; GFX9-NEXT: v_readlane_b32 s65, v63, 17 -; GFX9-NEXT: v_readlane_b32 s64, v63, 16 -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 34 +; GFX9-NEXT: v_readlane_b32 s31, v63, 35 +; GFX9-NEXT: v_readlane_b32 s99, v63, 33 +; GFX9-NEXT: v_readlane_b32 s98, v63, 32 +; GFX9-NEXT: v_readlane_b32 s97, v63, 31 +; GFX9-NEXT: v_readlane_b32 s96, v63, 30 +; GFX9-NEXT: v_readlane_b32 s87, v63, 29 +; GFX9-NEXT: v_readlane_b32 s86, v63, 28 +; GFX9-NEXT: v_readlane_b32 s85, v63, 27 +; GFX9-NEXT: v_readlane_b32 s84, v63, 26 +; GFX9-NEXT: v_readlane_b32 s83, v63, 25 +; GFX9-NEXT: v_readlane_b32 s82, v63, 24 +; GFX9-NEXT: v_readlane_b32 s81, v63, 23 +; GFX9-NEXT: v_readlane_b32 s80, v63, 22 +; GFX9-NEXT: v_readlane_b32 s71, v63, 21 +; GFX9-NEXT: v_readlane_b32 s70, v63, 20 +; GFX9-NEXT: v_readlane_b32 s69, v63, 19 +; GFX9-NEXT: v_readlane_b32 s68, v63, 18 +; GFX9-NEXT: v_readlane_b32 s67, v63, 17 +; GFX9-NEXT: v_readlane_b32 s66, v63, 16 +; GFX9-NEXT: v_readlane_b32 s65, v63, 15 +; GFX9-NEXT: v_readlane_b32 s64, v63, 14 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_or_b32_sdwa v15, v16, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_or_b32_sdwa v15, v35, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -48648,90 +49107,111 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:88 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v75, s30, 0 -; GFX11-NEXT: v_writelane_b32 v76, s96, 0 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 -; GFX11-NEXT: v_readfirstlane_b32 s40, v1 -; GFX11-NEXT: v_readfirstlane_b32 s41, v2 -; GFX11-NEXT: v_writelane_b32 v75, s31, 1 -; GFX11-NEXT: v_writelane_b32 v76, s97, 1 -; GFX11-NEXT: v_readfirstlane_b32 s14, v3 -; GFX11-NEXT: v_readfirstlane_b32 s15, v4 -; GFX11-NEXT: v_readfirstlane_b32 s4, v5 -; GFX11-NEXT: v_writelane_b32 v75, s34, 2 -; GFX11-NEXT: v_writelane_b32 v76, s98, 2 -; GFX11-NEXT: v_readfirstlane_b32 s5, v6 -; GFX11-NEXT: v_readfirstlane_b32 s6, v7 -; GFX11-NEXT: v_readfirstlane_b32 s7, v8 -; GFX11-NEXT: v_writelane_b32 v75, s35, 3 -; GFX11-NEXT: v_writelane_b32 v76, s99, 3 -; GFX11-NEXT: v_readfirstlane_b32 s8, v9 -; GFX11-NEXT: v_readfirstlane_b32 s9, v10 -; GFX11-NEXT: v_readfirstlane_b32 s10, v11 -; GFX11-NEXT: v_writelane_b32 v75, s36, 4 -; GFX11-NEXT: v_writelane_b32 v76, s100, 4 -; GFX11-NEXT: v_readfirstlane_b32 s11, v12 -; GFX11-NEXT: v_readfirstlane_b32 s12, v13 -; GFX11-NEXT: v_readfirstlane_b32 s13, v14 -; GFX11-NEXT: v_writelane_b32 v75, s37, 5 -; GFX11-NEXT: v_writelane_b32 v76, s101, 5 -; GFX11-NEXT: s_mov_b32 vcc_hi, 0 -; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: s_clause 0x12 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 -; GFX11-NEXT: v_writelane_b32 v75, s38, 6 -; GFX11-NEXT: v_writelane_b32 v76, s102, 6 +; GFX11-NEXT: v_writelane_b32 v75, s34, 0 +; GFX11-NEXT: v_writelane_b32 v75, s35, 1 +; GFX11-NEXT: v_writelane_b32 v75, s36, 2 +; GFX11-NEXT: v_writelane_b32 v75, s37, 3 +; GFX11-NEXT: v_writelane_b32 v75, s38, 4 +; GFX11-NEXT: v_writelane_b32 v75, s39, 5 +; GFX11-NEXT: v_writelane_b32 v75, s48, 6 +; GFX11-NEXT: v_writelane_b32 v75, s49, 7 +; GFX11-NEXT: v_writelane_b32 v75, s50, 8 +; GFX11-NEXT: v_writelane_b32 v75, s51, 9 +; GFX11-NEXT: v_writelane_b32 v75, s52, 10 +; GFX11-NEXT: v_writelane_b32 v75, s53, 11 +; GFX11-NEXT: v_writelane_b32 v75, s54, 12 +; GFX11-NEXT: v_writelane_b32 v75, s55, 13 +; GFX11-NEXT: v_writelane_b32 v75, s64, 14 +; GFX11-NEXT: v_writelane_b32 v75, s65, 15 +; GFX11-NEXT: v_writelane_b32 v75, s66, 16 +; GFX11-NEXT: v_writelane_b32 v75, s67, 17 +; GFX11-NEXT: v_writelane_b32 v75, s68, 18 +; GFX11-NEXT: v_writelane_b32 v75, s69, 19 +; GFX11-NEXT: v_writelane_b32 v75, s70, 20 +; GFX11-NEXT: v_writelane_b32 v75, s71, 21 +; GFX11-NEXT: v_writelane_b32 v75, s80, 22 +; GFX11-NEXT: v_writelane_b32 v75, s81, 23 +; GFX11-NEXT: v_writelane_b32 v75, s82, 24 +; GFX11-NEXT: v_writelane_b32 v75, s83, 25 +; GFX11-NEXT: v_writelane_b32 v75, s84, 26 +; GFX11-NEXT: v_writelane_b32 v75, s85, 27 +; GFX11-NEXT: v_writelane_b32 v75, s86, 28 +; GFX11-NEXT: v_writelane_b32 v75, s87, 29 +; GFX11-NEXT: v_writelane_b32 v75, s96, 30 +; GFX11-NEXT: v_writelane_b32 v75, s97, 31 +; GFX11-NEXT: v_writelane_b32 v76, s98, 0 +; GFX11-NEXT: v_writelane_b32 v76, s99, 1 +; GFX11-NEXT: v_writelane_b32 v76, s100, 2 +; GFX11-NEXT: v_writelane_b32 v76, s101, 3 +; GFX11-NEXT: v_writelane_b32 v76, s102, 4 +; GFX11-NEXT: v_writelane_b32 v76, s103, 5 +; GFX11-NEXT: v_writelane_b32 v76, s104, 6 +; GFX11-NEXT: v_writelane_b32 v76, s30, 7 +; GFX11-NEXT: v_writelane_b32 v76, s31, 8 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 +; GFX11-NEXT: v_readfirstlane_b32 s40, v1 +; GFX11-NEXT: v_readfirstlane_b32 s41, v2 +; GFX11-NEXT: v_readfirstlane_b32 s14, v3 +; GFX11-NEXT: v_readfirstlane_b32 s15, v4 +; GFX11-NEXT: v_readfirstlane_b32 s4, v5 +; GFX11-NEXT: v_readfirstlane_b32 s5, v6 +; GFX11-NEXT: v_readfirstlane_b32 s6, v7 +; GFX11-NEXT: v_readfirstlane_b32 s7, v8 +; GFX11-NEXT: v_readfirstlane_b32 s8, v9 +; GFX11-NEXT: v_readfirstlane_b32 s9, v10 +; GFX11-NEXT: v_readfirstlane_b32 s10, v11 +; GFX11-NEXT: v_readfirstlane_b32 s11, v12 +; GFX11-NEXT: v_readfirstlane_b32 s12, v13 +; GFX11-NEXT: v_readfirstlane_b32 s13, v14 +; GFX11-NEXT: s_mov_b32 vcc_hi, 0 +; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr77 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr78 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v75, s39, 7 -; GFX11-NEXT: v_writelane_b32 v76, s103, 7 -; GFX11-NEXT: v_writelane_b32 v75, s48, 8 -; GFX11-NEXT: v_writelane_b32 v76, s104, 8 -; GFX11-NEXT: v_writelane_b32 v75, s49, 9 -; GFX11-NEXT: v_writelane_b32 v75, s50, 10 -; GFX11-NEXT: v_writelane_b32 v75, s51, 11 -; GFX11-NEXT: v_writelane_b32 v75, s52, 12 -; GFX11-NEXT: v_writelane_b32 v75, s53, 13 -; GFX11-NEXT: v_writelane_b32 v75, s54, 14 -; GFX11-NEXT: v_writelane_b32 v75, s55, 15 -; GFX11-NEXT: v_writelane_b32 v75, s64, 16 -; GFX11-NEXT: v_writelane_b32 v75, s65, 17 -; GFX11-NEXT: v_writelane_b32 v75, s66, 18 -; GFX11-NEXT: v_writelane_b32 v75, s67, 19 -; GFX11-NEXT: v_writelane_b32 v75, s68, 20 -; GFX11-NEXT: v_writelane_b32 v75, s69, 21 -; GFX11-NEXT: v_writelane_b32 v75, s70, 22 -; GFX11-NEXT: v_writelane_b32 v75, s71, 23 -; GFX11-NEXT: v_writelane_b32 v75, s80, 24 -; GFX11-NEXT: v_writelane_b32 v75, s81, 25 -; GFX11-NEXT: v_writelane_b32 v75, s82, 26 -; GFX11-NEXT: v_writelane_b32 v75, s83, 27 -; GFX11-NEXT: v_writelane_b32 v75, s84, 28 -; GFX11-NEXT: v_writelane_b32 v75, s85, 29 -; GFX11-NEXT: v_writelane_b32 v75, s86, 30 -; GFX11-NEXT: v_writelane_b32 v75, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB37_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s13, 24 @@ -49621,47 +50101,47 @@ define inreg <128 x i8> @bitcast_v32f32_to_v128i8_scalar(<32 x float> inreg %a, ; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:64 ; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:72 -; GFX11-NEXT: v_readlane_b32 s104, v76, 8 -; GFX11-NEXT: v_readlane_b32 s103, v76, 7 -; GFX11-NEXT: v_readlane_b32 s102, v76, 6 -; GFX11-NEXT: v_readlane_b32 s101, v76, 5 -; GFX11-NEXT: v_readlane_b32 s100, v76, 4 -; GFX11-NEXT: v_readlane_b32 s99, v76, 3 -; GFX11-NEXT: v_readlane_b32 s98, v76, 2 -; GFX11-NEXT: v_readlane_b32 s97, v76, 1 -; GFX11-NEXT: v_readlane_b32 s96, v76, 0 -; GFX11-NEXT: v_readlane_b32 s87, v75, 31 -; GFX11-NEXT: v_readlane_b32 s86, v75, 30 -; GFX11-NEXT: v_readlane_b32 s85, v75, 29 -; GFX11-NEXT: v_readlane_b32 s84, v75, 28 -; GFX11-NEXT: v_readlane_b32 s83, v75, 27 -; GFX11-NEXT: v_readlane_b32 s82, v75, 26 -; GFX11-NEXT: v_readlane_b32 s81, v75, 25 -; GFX11-NEXT: v_readlane_b32 s80, v75, 24 -; GFX11-NEXT: v_readlane_b32 s71, v75, 23 -; GFX11-NEXT: v_readlane_b32 s70, v75, 22 -; GFX11-NEXT: v_readlane_b32 s69, v75, 21 -; GFX11-NEXT: v_readlane_b32 s68, v75, 20 -; GFX11-NEXT: v_readlane_b32 s67, v75, 19 -; GFX11-NEXT: v_readlane_b32 s66, v75, 18 -; GFX11-NEXT: v_readlane_b32 s65, v75, 17 -; GFX11-NEXT: v_readlane_b32 s64, v75, 16 -; GFX11-NEXT: v_readlane_b32 s55, v75, 15 -; GFX11-NEXT: v_readlane_b32 s54, v75, 14 -; GFX11-NEXT: v_readlane_b32 s53, v75, 13 -; GFX11-NEXT: v_readlane_b32 s52, v75, 12 -; GFX11-NEXT: v_readlane_b32 s51, v75, 11 -; GFX11-NEXT: v_readlane_b32 s50, v75, 10 -; GFX11-NEXT: v_readlane_b32 s49, v75, 9 -; GFX11-NEXT: v_readlane_b32 s48, v75, 8 -; GFX11-NEXT: v_readlane_b32 s39, v75, 7 -; GFX11-NEXT: v_readlane_b32 s38, v75, 6 -; GFX11-NEXT: v_readlane_b32 s37, v75, 5 -; GFX11-NEXT: v_readlane_b32 s36, v75, 4 -; GFX11-NEXT: v_readlane_b32 s35, v75, 3 -; GFX11-NEXT: v_readlane_b32 s34, v75, 2 -; GFX11-NEXT: v_readlane_b32 s31, v75, 1 -; GFX11-NEXT: v_readlane_b32 s30, v75, 0 +; GFX11-NEXT: v_readlane_b32 s30, v76, 7 +; GFX11-NEXT: v_readlane_b32 s31, v76, 8 +; GFX11-NEXT: v_readlane_b32 s104, v76, 6 +; GFX11-NEXT: v_readlane_b32 s103, v76, 5 +; GFX11-NEXT: v_readlane_b32 s102, v76, 4 +; GFX11-NEXT: v_readlane_b32 s101, v76, 3 +; GFX11-NEXT: v_readlane_b32 s100, v76, 2 +; GFX11-NEXT: v_readlane_b32 s99, v76, 1 +; GFX11-NEXT: v_readlane_b32 s98, v76, 0 +; GFX11-NEXT: v_readlane_b32 s97, v75, 31 +; GFX11-NEXT: v_readlane_b32 s96, v75, 30 +; GFX11-NEXT: v_readlane_b32 s87, v75, 29 +; GFX11-NEXT: v_readlane_b32 s86, v75, 28 +; GFX11-NEXT: v_readlane_b32 s85, v75, 27 +; GFX11-NEXT: v_readlane_b32 s84, v75, 26 +; GFX11-NEXT: v_readlane_b32 s83, v75, 25 +; GFX11-NEXT: v_readlane_b32 s82, v75, 24 +; GFX11-NEXT: v_readlane_b32 s81, v75, 23 +; GFX11-NEXT: v_readlane_b32 s80, v75, 22 +; GFX11-NEXT: v_readlane_b32 s71, v75, 21 +; GFX11-NEXT: v_readlane_b32 s70, v75, 20 +; GFX11-NEXT: v_readlane_b32 s69, v75, 19 +; GFX11-NEXT: v_readlane_b32 s68, v75, 18 +; GFX11-NEXT: v_readlane_b32 s67, v75, 17 +; GFX11-NEXT: v_readlane_b32 s66, v75, 16 +; GFX11-NEXT: v_readlane_b32 s65, v75, 15 +; GFX11-NEXT: v_readlane_b32 s64, v75, 14 +; GFX11-NEXT: v_readlane_b32 s55, v75, 13 +; GFX11-NEXT: v_readlane_b32 s54, v75, 12 +; GFX11-NEXT: v_readlane_b32 s53, v75, 11 +; GFX11-NEXT: v_readlane_b32 s52, v75, 10 +; GFX11-NEXT: v_readlane_b32 s51, v75, 9 +; GFX11-NEXT: v_readlane_b32 s50, v75, 8 +; GFX11-NEXT: v_readlane_b32 s49, v75, 7 +; GFX11-NEXT: v_readlane_b32 s48, v75, 6 +; GFX11-NEXT: v_readlane_b32 s39, v75, 5 +; GFX11-NEXT: v_readlane_b32 s38, v75, 4 +; GFX11-NEXT: v_readlane_b32 s37, v75, 3 +; GFX11-NEXT: v_readlane_b32 s36, v75, 2 +; GFX11-NEXT: v_readlane_b32 s35, v75, 1 +; GFX11-NEXT: v_readlane_b32 s34, v75, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:76 @@ -54428,56 +54908,105 @@ define <32 x float> @bitcast_v128i8_to_v32f32(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:592 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:588 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:584 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:468 ; GFX11-FAKE16-NEXT: s_clause 0x12 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -58749,45 +59278,83 @@ define inreg <32 x float> @bitcast_v128i8_to_v32f32_scalar(<128 x i8> inreg %a, ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-TRUE16-NEXT: s_clause 0x7 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -59690,45 +60257,83 @@ define inreg <32 x float> @bitcast_v128i8_to_v32f32_scalar(<128 x i8> inreg %a, ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-FAKE16-NEXT: s_clause 0x7 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -61535,44 +62140,57 @@ define inreg <64 x bfloat> @bitcast_v32f32_to_v64bf16_scalar(<32 x float> inreg ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_writelane_b32 v63, s97, 33 -; SI-NEXT: v_writelane_b32 v63, s98, 34 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s6, v1 ; SI-NEXT: v_readfirstlane_b32 s7, v2 ; SI-NEXT: v_readfirstlane_b32 s8, v3 @@ -61592,20 +62210,6 @@ define inreg <64 x bfloat> @bitcast_v32f32_to_v64bf16_scalar(<32 x float> inreg ; SI-NEXT: v_readfirstlane_b32 s46, v17 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s47, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB41_3 ; SI-NEXT: ; %bb.1: ; %cmp.false @@ -62140,42 +62744,42 @@ define inreg <64 x bfloat> @bitcast_v32f32_to_v64bf16_scalar(<32 x float> inreg ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s30, v63, 34 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_mul_f32_e32 v1, 1.0, v1 ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 @@ -64241,20 +64845,35 @@ define <32 x float> @bitcast_v64bf16_to_v32f32(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:8 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 @@ -67290,81 +67909,149 @@ define inreg <32 x float> @bitcast_v64bf16_to_v32f32_scalar(<64 x bfloat> inreg ; GFX11-TRUE16-LABEL: bitcast_v64bf16_to_v32f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 ; GFX11-TRUE16-NEXT: s_clause 0x6 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 @@ -68264,83 +68951,153 @@ define inreg <32 x float> @bitcast_v64bf16_to_v32f32_scalar(<64 x bfloat> inreg ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v32f32_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 ; GFX11-FAKE16-NEXT: s_clause 0x8 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 @@ -70092,6 +70849,22 @@ define inreg <64 x half> @bitcast_v32f32_to_v64f16_scalar(<32 x float> inreg %a, ; SI-LABEL: bitcast_v32f32_to_v64f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: v_readfirstlane_b32 s47, v1 ; SI-NEXT: v_readfirstlane_b32 s46, v2 @@ -70112,22 +70885,6 @@ define inreg <64 x half> @bitcast_v32f32_to_v64f16_scalar(<32 x float> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s6, v17 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB45_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -72812,84 +73569,155 @@ define inreg <32 x float> @bitcast_v64f16_to_v32f32_scalar(<64 x half> inreg %a, ; GFX11-LABEL: bitcast_v64f16_to_v32f32_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -73625,22 +74453,6 @@ define inreg <64 x i16> @bitcast_v32f32_to_v64i16_scalar(<32 x float> inreg %a, ; SI-LABEL: bitcast_v32f32_to_v64i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_mov_b32_e32 v31, s16 -; SI-NEXT: v_mov_b32_e32 v32, s17 -; SI-NEXT: v_mov_b32_e32 v29, s18 -; SI-NEXT: v_mov_b32_e32 v30, s19 -; SI-NEXT: v_mov_b32_e32 v27, s20 -; SI-NEXT: v_mov_b32_e32 v28, s21 -; SI-NEXT: v_mov_b32_e32 v25, s22 -; SI-NEXT: v_mov_b32_e32 v26, s23 -; SI-NEXT: v_mov_b32_e32 v23, s24 -; SI-NEXT: v_mov_b32_e32 v24, s25 -; SI-NEXT: v_mov_b32_e32 v21, s26 -; SI-NEXT: v_mov_b32_e32 v22, s27 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_mov_b32_e32 v19, s28 -; SI-NEXT: v_mov_b32_e32 v20, s29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -73657,6 +74469,22 @@ define inreg <64 x i16> @bitcast_v32f32_to_v64i16_scalar(<32 x float> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 +; SI-NEXT: v_mov_b32_e32 v31, s16 +; SI-NEXT: v_mov_b32_e32 v32, s17 +; SI-NEXT: v_mov_b32_e32 v29, s18 +; SI-NEXT: v_mov_b32_e32 v30, s19 +; SI-NEXT: v_mov_b32_e32 v27, s20 +; SI-NEXT: v_mov_b32_e32 v28, s21 +; SI-NEXT: v_mov_b32_e32 v25, s22 +; SI-NEXT: v_mov_b32_e32 v26, s23 +; SI-NEXT: v_mov_b32_e32 v23, s24 +; SI-NEXT: v_mov_b32_e32 v24, s25 +; SI-NEXT: v_mov_b32_e32 v21, s26 +; SI-NEXT: v_mov_b32_e32 v22, s27 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_mov_b32_e32 v19, s28 +; SI-NEXT: v_mov_b32_e32 v20, s29 ; SI-NEXT: s_cbranch_scc0 .LBB49_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[36:37], v[17:18], 16 @@ -75816,84 +76644,155 @@ define inreg <32 x float> @bitcast_v64i16_to_v32f32_scalar(<64 x i16> inreg %a, ; GFX11-LABEL: bitcast_v64i16_to_v32f32_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -80714,24 +81613,43 @@ define <128 x i8> @bitcast_v16i64_to_v128i8(<16 x i64> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -81431,42 +82349,43 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 -; SI-NEXT: v_writelane_b32 v20, s69, 21 -; SI-NEXT: v_writelane_b32 v20, s70, 22 -; SI-NEXT: v_writelane_b32 v20, s71, 23 -; SI-NEXT: v_writelane_b32 v20, s80, 24 -; SI-NEXT: v_writelane_b32 v20, s81, 25 -; SI-NEXT: v_writelane_b32 v20, s82, 26 -; SI-NEXT: v_writelane_b32 v20, s83, 27 -; SI-NEXT: v_writelane_b32 v20, s84, 28 -; SI-NEXT: v_writelane_b32 v20, s85, 29 -; SI-NEXT: v_writelane_b32 v20, s86, 30 -; SI-NEXT: v_writelane_b32 v20, s87, 31 -; SI-NEXT: v_writelane_b32 v20, s96, 32 -; SI-NEXT: v_writelane_b32 v20, s97, 33 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s70, 20 +; SI-NEXT: v_writelane_b32 v20, s71, 21 +; SI-NEXT: v_writelane_b32 v20, s80, 22 +; SI-NEXT: v_writelane_b32 v20, s81, 23 +; SI-NEXT: v_writelane_b32 v20, s82, 24 +; SI-NEXT: v_writelane_b32 v20, s83, 25 +; SI-NEXT: v_writelane_b32 v20, s84, 26 +; SI-NEXT: v_writelane_b32 v20, s85, 27 +; SI-NEXT: v_writelane_b32 v20, s86, 28 +; SI-NEXT: v_writelane_b32 v20, s87, 29 +; SI-NEXT: v_writelane_b32 v20, s96, 30 +; SI-NEXT: v_writelane_b32 v20, s97, 31 +; SI-NEXT: v_writelane_b32 v20, s98, 32 +; SI-NEXT: v_writelane_b32 v20, s99, 33 +; SI-NEXT: v_writelane_b32 v20, s30, 34 +; SI-NEXT: v_writelane_b32 v20, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s98, 34 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -81486,7 +82405,6 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: v_writelane_b32 v20, s99, 35 ; SI-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane ; SI-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB57_4 @@ -82356,6 +83274,7 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v20, 34 ; SI-NEXT: v_readlane_b32 s19, v22, 11 ; SI-NEXT: v_readlane_b32 s17, v22, 17 ; SI-NEXT: v_readlane_b32 s15, v22, 23 @@ -82363,42 +83282,41 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: v_readlane_b32 s11, v22, 35 ; SI-NEXT: v_readlane_b32 s9, v22, 39 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v20, 35 -; SI-NEXT: v_readlane_b32 s98, v20, 34 -; SI-NEXT: v_readlane_b32 s97, v20, 33 -; SI-NEXT: v_readlane_b32 s96, v20, 32 -; SI-NEXT: v_readlane_b32 s87, v20, 31 -; SI-NEXT: v_readlane_b32 s86, v20, 30 -; SI-NEXT: v_readlane_b32 s85, v20, 29 -; SI-NEXT: v_readlane_b32 s84, v20, 28 -; SI-NEXT: v_readlane_b32 s83, v20, 27 -; SI-NEXT: v_readlane_b32 s82, v20, 26 -; SI-NEXT: v_readlane_b32 s81, v20, 25 -; SI-NEXT: v_readlane_b32 s80, v20, 24 -; SI-NEXT: v_readlane_b32 s71, v20, 23 -; SI-NEXT: v_readlane_b32 s70, v20, 22 -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 35 +; SI-NEXT: v_readlane_b32 s99, v20, 33 +; SI-NEXT: v_readlane_b32 s98, v20, 32 +; SI-NEXT: v_readlane_b32 s97, v20, 31 +; SI-NEXT: v_readlane_b32 s96, v20, 30 +; SI-NEXT: v_readlane_b32 s87, v20, 29 +; SI-NEXT: v_readlane_b32 s86, v20, 28 +; SI-NEXT: v_readlane_b32 s85, v20, 27 +; SI-NEXT: v_readlane_b32 s84, v20, 26 +; SI-NEXT: v_readlane_b32 s83, v20, 25 +; SI-NEXT: v_readlane_b32 s82, v20, 24 +; SI-NEXT: v_readlane_b32 s81, v20, 23 +; SI-NEXT: v_readlane_b32 s80, v20, 22 +; SI-NEXT: v_readlane_b32 s71, v20, 21 +; SI-NEXT: v_readlane_b32 s70, v20, 20 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -82595,38 +83513,39 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; VI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v20, s30, 0 -; VI-NEXT: v_writelane_b32 v20, s31, 1 -; VI-NEXT: v_writelane_b32 v20, s34, 2 -; VI-NEXT: v_writelane_b32 v20, s35, 3 -; VI-NEXT: v_writelane_b32 v20, s36, 4 -; VI-NEXT: v_writelane_b32 v20, s37, 5 -; VI-NEXT: v_writelane_b32 v20, s38, 6 -; VI-NEXT: v_writelane_b32 v20, s39, 7 -; VI-NEXT: v_writelane_b32 v20, s48, 8 -; VI-NEXT: v_writelane_b32 v20, s49, 9 -; VI-NEXT: v_writelane_b32 v20, s50, 10 -; VI-NEXT: v_writelane_b32 v20, s51, 11 -; VI-NEXT: v_writelane_b32 v20, s52, 12 -; VI-NEXT: v_writelane_b32 v20, s53, 13 -; VI-NEXT: v_writelane_b32 v20, s54, 14 -; VI-NEXT: v_writelane_b32 v20, s55, 15 -; VI-NEXT: v_writelane_b32 v20, s64, 16 -; VI-NEXT: v_writelane_b32 v20, s65, 17 -; VI-NEXT: v_writelane_b32 v20, s66, 18 -; VI-NEXT: v_writelane_b32 v20, s67, 19 -; VI-NEXT: v_writelane_b32 v20, s68, 20 -; VI-NEXT: v_writelane_b32 v20, s69, 21 -; VI-NEXT: v_writelane_b32 v20, s70, 22 -; VI-NEXT: v_writelane_b32 v20, s71, 23 -; VI-NEXT: v_writelane_b32 v20, s80, 24 -; VI-NEXT: v_writelane_b32 v20, s81, 25 -; VI-NEXT: v_writelane_b32 v20, s82, 26 -; VI-NEXT: v_writelane_b32 v20, s83, 27 -; VI-NEXT: v_writelane_b32 v20, s84, 28 -; VI-NEXT: v_writelane_b32 v20, s85, 29 +; VI-NEXT: v_writelane_b32 v20, s34, 0 +; VI-NEXT: v_writelane_b32 v20, s35, 1 +; VI-NEXT: v_writelane_b32 v20, s36, 2 +; VI-NEXT: v_writelane_b32 v20, s37, 3 +; VI-NEXT: v_writelane_b32 v20, s38, 4 +; VI-NEXT: v_writelane_b32 v20, s39, 5 +; VI-NEXT: v_writelane_b32 v20, s48, 6 +; VI-NEXT: v_writelane_b32 v20, s49, 7 +; VI-NEXT: v_writelane_b32 v20, s50, 8 +; VI-NEXT: v_writelane_b32 v20, s51, 9 +; VI-NEXT: v_writelane_b32 v20, s52, 10 +; VI-NEXT: v_writelane_b32 v20, s53, 11 +; VI-NEXT: v_writelane_b32 v20, s54, 12 +; VI-NEXT: v_writelane_b32 v20, s55, 13 +; VI-NEXT: v_writelane_b32 v20, s64, 14 +; VI-NEXT: v_writelane_b32 v20, s65, 15 +; VI-NEXT: v_writelane_b32 v20, s66, 16 +; VI-NEXT: v_writelane_b32 v20, s67, 17 +; VI-NEXT: v_writelane_b32 v20, s68, 18 +; VI-NEXT: v_writelane_b32 v20, s69, 19 +; VI-NEXT: v_writelane_b32 v20, s70, 20 +; VI-NEXT: v_writelane_b32 v20, s71, 21 +; VI-NEXT: v_writelane_b32 v20, s80, 22 +; VI-NEXT: v_writelane_b32 v20, s81, 23 +; VI-NEXT: v_writelane_b32 v20, s82, 24 +; VI-NEXT: v_writelane_b32 v20, s83, 25 +; VI-NEXT: v_writelane_b32 v20, s84, 26 +; VI-NEXT: v_writelane_b32 v20, s85, 27 +; VI-NEXT: v_writelane_b32 v20, s86, 28 +; VI-NEXT: v_writelane_b32 v20, s87, 29 +; VI-NEXT: v_writelane_b32 v20, s30, 30 +; VI-NEXT: v_writelane_b32 v20, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v20, s86, 30 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s45, v2 ; VI-NEXT: v_readfirstlane_b32 s42, v3 @@ -82646,7 +83565,6 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s4, v17 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v18 -; VI-NEXT: v_writelane_b32 v20, s87, 31 ; VI-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB57_4 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -83435,43 +84353,43 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 0x7c, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v20, 30 ; VI-NEXT: v_readlane_b32 s15, v21, 1 ; VI-NEXT: v_readlane_b32 s13, v21, 3 ; VI-NEXT: v_readlane_b32 s11, v21, 5 ; VI-NEXT: v_readlane_b32 s9, v21, 7 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s87, v20, 31 -; VI-NEXT: v_readlane_b32 s86, v20, 30 -; VI-NEXT: v_readlane_b32 s85, v20, 29 -; VI-NEXT: v_readlane_b32 s84, v20, 28 -; VI-NEXT: v_readlane_b32 s83, v20, 27 -; VI-NEXT: v_readlane_b32 s82, v20, 26 -; VI-NEXT: v_readlane_b32 s81, v20, 25 -; VI-NEXT: v_readlane_b32 s80, v20, 24 -; VI-NEXT: v_readlane_b32 s71, v20, 23 -; VI-NEXT: v_readlane_b32 s70, v20, 22 -; VI-NEXT: v_readlane_b32 s69, v20, 21 -; VI-NEXT: v_readlane_b32 s68, v20, 20 -; VI-NEXT: v_readlane_b32 s67, v20, 19 -; VI-NEXT: v_readlane_b32 s66, v20, 18 -; VI-NEXT: v_readlane_b32 s65, v20, 17 -; VI-NEXT: v_readlane_b32 s64, v20, 16 -; VI-NEXT: v_readlane_b32 s55, v20, 15 -; VI-NEXT: v_readlane_b32 s54, v20, 14 -; VI-NEXT: v_readlane_b32 s53, v20, 13 -; VI-NEXT: v_readlane_b32 s52, v20, 12 -; VI-NEXT: v_readlane_b32 s51, v20, 11 -; VI-NEXT: v_readlane_b32 s50, v20, 10 -; VI-NEXT: v_readlane_b32 s49, v20, 9 -; VI-NEXT: v_readlane_b32 s48, v20, 8 -; VI-NEXT: v_readlane_b32 s39, v20, 7 -; VI-NEXT: v_readlane_b32 s38, v20, 6 -; VI-NEXT: v_readlane_b32 s37, v20, 5 -; VI-NEXT: v_readlane_b32 s36, v20, 4 -; VI-NEXT: v_readlane_b32 s35, v20, 3 -; VI-NEXT: v_readlane_b32 s34, v20, 2 -; VI-NEXT: v_readlane_b32 s31, v20, 1 -; VI-NEXT: v_readlane_b32 s30, v20, 0 +; VI-NEXT: v_readlane_b32 s31, v20, 31 +; VI-NEXT: v_readlane_b32 s87, v20, 29 +; VI-NEXT: v_readlane_b32 s86, v20, 28 +; VI-NEXT: v_readlane_b32 s85, v20, 27 +; VI-NEXT: v_readlane_b32 s84, v20, 26 +; VI-NEXT: v_readlane_b32 s83, v20, 25 +; VI-NEXT: v_readlane_b32 s82, v20, 24 +; VI-NEXT: v_readlane_b32 s81, v20, 23 +; VI-NEXT: v_readlane_b32 s80, v20, 22 +; VI-NEXT: v_readlane_b32 s71, v20, 21 +; VI-NEXT: v_readlane_b32 s70, v20, 20 +; VI-NEXT: v_readlane_b32 s69, v20, 19 +; VI-NEXT: v_readlane_b32 s68, v20, 18 +; VI-NEXT: v_readlane_b32 s67, v20, 17 +; VI-NEXT: v_readlane_b32 s66, v20, 16 +; VI-NEXT: v_readlane_b32 s65, v20, 15 +; VI-NEXT: v_readlane_b32 s64, v20, 14 +; VI-NEXT: v_readlane_b32 s55, v20, 13 +; VI-NEXT: v_readlane_b32 s54, v20, 12 +; VI-NEXT: v_readlane_b32 s53, v20, 11 +; VI-NEXT: v_readlane_b32 s52, v20, 10 +; VI-NEXT: v_readlane_b32 s51, v20, 9 +; VI-NEXT: v_readlane_b32 s50, v20, 8 +; VI-NEXT: v_readlane_b32 s49, v20, 7 +; VI-NEXT: v_readlane_b32 s48, v20, 6 +; VI-NEXT: v_readlane_b32 s39, v20, 5 +; VI-NEXT: v_readlane_b32 s38, v20, 4 +; VI-NEXT: v_readlane_b32 s37, v20, 3 +; VI-NEXT: v_readlane_b32 s36, v20, 2 +; VI-NEXT: v_readlane_b32 s35, v20, 1 +; VI-NEXT: v_readlane_b32 s34, v20, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -83644,42 +84562,43 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_writelane_b32 v20, s31, 1 -; GFX9-NEXT: v_writelane_b32 v20, s34, 2 -; GFX9-NEXT: v_writelane_b32 v20, s35, 3 -; GFX9-NEXT: v_writelane_b32 v20, s36, 4 -; GFX9-NEXT: v_writelane_b32 v20, s37, 5 -; GFX9-NEXT: v_writelane_b32 v20, s38, 6 -; GFX9-NEXT: v_writelane_b32 v20, s39, 7 -; GFX9-NEXT: v_writelane_b32 v20, s48, 8 -; GFX9-NEXT: v_writelane_b32 v20, s49, 9 -; GFX9-NEXT: v_writelane_b32 v20, s50, 10 -; GFX9-NEXT: v_writelane_b32 v20, s51, 11 -; GFX9-NEXT: v_writelane_b32 v20, s52, 12 -; GFX9-NEXT: v_writelane_b32 v20, s53, 13 -; GFX9-NEXT: v_writelane_b32 v20, s54, 14 -; GFX9-NEXT: v_writelane_b32 v20, s55, 15 -; GFX9-NEXT: v_writelane_b32 v20, s64, 16 -; GFX9-NEXT: v_writelane_b32 v20, s65, 17 -; GFX9-NEXT: v_writelane_b32 v20, s66, 18 -; GFX9-NEXT: v_writelane_b32 v20, s67, 19 -; GFX9-NEXT: v_writelane_b32 v20, s68, 20 -; GFX9-NEXT: v_writelane_b32 v20, s69, 21 -; GFX9-NEXT: v_writelane_b32 v20, s70, 22 -; GFX9-NEXT: v_writelane_b32 v20, s71, 23 -; GFX9-NEXT: v_writelane_b32 v20, s80, 24 -; GFX9-NEXT: v_writelane_b32 v20, s81, 25 -; GFX9-NEXT: v_writelane_b32 v20, s82, 26 -; GFX9-NEXT: v_writelane_b32 v20, s83, 27 -; GFX9-NEXT: v_writelane_b32 v20, s84, 28 -; GFX9-NEXT: v_writelane_b32 v20, s85, 29 -; GFX9-NEXT: v_writelane_b32 v20, s86, 30 -; GFX9-NEXT: v_writelane_b32 v20, s87, 31 -; GFX9-NEXT: v_writelane_b32 v20, s96, 32 -; GFX9-NEXT: v_writelane_b32 v20, s97, 33 +; GFX9-NEXT: v_writelane_b32 v20, s34, 0 +; GFX9-NEXT: v_writelane_b32 v20, s35, 1 +; GFX9-NEXT: v_writelane_b32 v20, s36, 2 +; GFX9-NEXT: v_writelane_b32 v20, s37, 3 +; GFX9-NEXT: v_writelane_b32 v20, s38, 4 +; GFX9-NEXT: v_writelane_b32 v20, s39, 5 +; GFX9-NEXT: v_writelane_b32 v20, s48, 6 +; GFX9-NEXT: v_writelane_b32 v20, s49, 7 +; GFX9-NEXT: v_writelane_b32 v20, s50, 8 +; GFX9-NEXT: v_writelane_b32 v20, s51, 9 +; GFX9-NEXT: v_writelane_b32 v20, s52, 10 +; GFX9-NEXT: v_writelane_b32 v20, s53, 11 +; GFX9-NEXT: v_writelane_b32 v20, s54, 12 +; GFX9-NEXT: v_writelane_b32 v20, s55, 13 +; GFX9-NEXT: v_writelane_b32 v20, s64, 14 +; GFX9-NEXT: v_writelane_b32 v20, s65, 15 +; GFX9-NEXT: v_writelane_b32 v20, s66, 16 +; GFX9-NEXT: v_writelane_b32 v20, s67, 17 +; GFX9-NEXT: v_writelane_b32 v20, s68, 18 +; GFX9-NEXT: v_writelane_b32 v20, s69, 19 +; GFX9-NEXT: v_writelane_b32 v20, s70, 20 +; GFX9-NEXT: v_writelane_b32 v20, s71, 21 +; GFX9-NEXT: v_writelane_b32 v20, s80, 22 +; GFX9-NEXT: v_writelane_b32 v20, s81, 23 +; GFX9-NEXT: v_writelane_b32 v20, s82, 24 +; GFX9-NEXT: v_writelane_b32 v20, s83, 25 +; GFX9-NEXT: v_writelane_b32 v20, s84, 26 +; GFX9-NEXT: v_writelane_b32 v20, s85, 27 +; GFX9-NEXT: v_writelane_b32 v20, s86, 28 +; GFX9-NEXT: v_writelane_b32 v20, s87, 29 +; GFX9-NEXT: v_writelane_b32 v20, s96, 30 +; GFX9-NEXT: v_writelane_b32 v20, s97, 31 +; GFX9-NEXT: v_writelane_b32 v20, s98, 32 +; GFX9-NEXT: v_writelane_b32 v20, s99, 33 +; GFX9-NEXT: v_writelane_b32 v20, s30, 34 +; GFX9-NEXT: v_writelane_b32 v20, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v20, s98, 34 ; GFX9-NEXT: v_readfirstlane_b32 s44, v1 ; GFX9-NEXT: v_readfirstlane_b32 s45, v2 ; GFX9-NEXT: v_readfirstlane_b32 s42, v3 @@ -83699,7 +84618,6 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX9-NEXT: v_readfirstlane_b32 s4, v17 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v18 -; GFX9-NEXT: v_writelane_b32 v20, s99, 35 ; GFX9-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB57_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -84433,44 +85351,44 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:120 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v20, 34 ; GFX9-NEXT: v_readlane_b32 s9, v21, 1 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:124 -; GFX9-NEXT: v_readlane_b32 s99, v20, 35 -; GFX9-NEXT: v_readlane_b32 s98, v20, 34 -; GFX9-NEXT: v_readlane_b32 s97, v20, 33 -; GFX9-NEXT: v_readlane_b32 s96, v20, 32 -; GFX9-NEXT: v_readlane_b32 s87, v20, 31 -; GFX9-NEXT: v_readlane_b32 s86, v20, 30 -; GFX9-NEXT: v_readlane_b32 s85, v20, 29 -; GFX9-NEXT: v_readlane_b32 s84, v20, 28 -; GFX9-NEXT: v_readlane_b32 s83, v20, 27 -; GFX9-NEXT: v_readlane_b32 s82, v20, 26 -; GFX9-NEXT: v_readlane_b32 s81, v20, 25 -; GFX9-NEXT: v_readlane_b32 s80, v20, 24 -; GFX9-NEXT: v_readlane_b32 s71, v20, 23 -; GFX9-NEXT: v_readlane_b32 s70, v20, 22 -; GFX9-NEXT: v_readlane_b32 s69, v20, 21 -; GFX9-NEXT: v_readlane_b32 s68, v20, 20 -; GFX9-NEXT: v_readlane_b32 s67, v20, 19 -; GFX9-NEXT: v_readlane_b32 s66, v20, 18 -; GFX9-NEXT: v_readlane_b32 s65, v20, 17 -; GFX9-NEXT: v_readlane_b32 s64, v20, 16 -; GFX9-NEXT: v_readlane_b32 s55, v20, 15 -; GFX9-NEXT: v_readlane_b32 s54, v20, 14 -; GFX9-NEXT: v_readlane_b32 s53, v20, 13 -; GFX9-NEXT: v_readlane_b32 s52, v20, 12 -; GFX9-NEXT: v_readlane_b32 s51, v20, 11 -; GFX9-NEXT: v_readlane_b32 s50, v20, 10 -; GFX9-NEXT: v_readlane_b32 s49, v20, 9 -; GFX9-NEXT: v_readlane_b32 s48, v20, 8 -; GFX9-NEXT: v_readlane_b32 s39, v20, 7 -; GFX9-NEXT: v_readlane_b32 s38, v20, 6 -; GFX9-NEXT: v_readlane_b32 s37, v20, 5 -; GFX9-NEXT: v_readlane_b32 s36, v20, 4 -; GFX9-NEXT: v_readlane_b32 s35, v20, 3 -; GFX9-NEXT: v_readlane_b32 s34, v20, 2 -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 -; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 35 +; GFX9-NEXT: v_readlane_b32 s99, v20, 33 +; GFX9-NEXT: v_readlane_b32 s98, v20, 32 +; GFX9-NEXT: v_readlane_b32 s97, v20, 31 +; GFX9-NEXT: v_readlane_b32 s96, v20, 30 +; GFX9-NEXT: v_readlane_b32 s87, v20, 29 +; GFX9-NEXT: v_readlane_b32 s86, v20, 28 +; GFX9-NEXT: v_readlane_b32 s85, v20, 27 +; GFX9-NEXT: v_readlane_b32 s84, v20, 26 +; GFX9-NEXT: v_readlane_b32 s83, v20, 25 +; GFX9-NEXT: v_readlane_b32 s82, v20, 24 +; GFX9-NEXT: v_readlane_b32 s81, v20, 23 +; GFX9-NEXT: v_readlane_b32 s80, v20, 22 +; GFX9-NEXT: v_readlane_b32 s71, v20, 21 +; GFX9-NEXT: v_readlane_b32 s70, v20, 20 +; GFX9-NEXT: v_readlane_b32 s69, v20, 19 +; GFX9-NEXT: v_readlane_b32 s68, v20, 18 +; GFX9-NEXT: v_readlane_b32 s67, v20, 17 +; GFX9-NEXT: v_readlane_b32 s66, v20, 16 +; GFX9-NEXT: v_readlane_b32 s65, v20, 15 +; GFX9-NEXT: v_readlane_b32 s64, v20, 14 +; GFX9-NEXT: v_readlane_b32 s55, v20, 13 +; GFX9-NEXT: v_readlane_b32 s54, v20, 12 +; GFX9-NEXT: v_readlane_b32 s53, v20, 11 +; GFX9-NEXT: v_readlane_b32 s52, v20, 10 +; GFX9-NEXT: v_readlane_b32 s51, v20, 9 +; GFX9-NEXT: v_readlane_b32 s50, v20, 8 +; GFX9-NEXT: v_readlane_b32 s49, v20, 7 +; GFX9-NEXT: v_readlane_b32 s48, v20, 6 +; GFX9-NEXT: v_readlane_b32 s39, v20, 5 +; GFX9-NEXT: v_readlane_b32 s38, v20, 4 +; GFX9-NEXT: v_readlane_b32 s37, v20, 3 +; GFX9-NEXT: v_readlane_b32 s36, v20, 2 +; GFX9-NEXT: v_readlane_b32 s35, v20, 1 +; GFX9-NEXT: v_readlane_b32 s34, v20, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -84633,70 +85551,73 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v16, s32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v17, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v18, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v19, s32 offset:12 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v16, s30, 0 -; GFX11-NEXT: v_writelane_b32 v17, s96, 0 +; GFX11-NEXT: v_writelane_b32 v16, s34, 0 +; GFX11-NEXT: v_writelane_b32 v16, s35, 1 +; GFX11-NEXT: v_writelane_b32 v16, s36, 2 +; GFX11-NEXT: v_writelane_b32 v16, s37, 3 +; GFX11-NEXT: v_writelane_b32 v16, s38, 4 +; GFX11-NEXT: v_writelane_b32 v16, s39, 5 +; GFX11-NEXT: v_writelane_b32 v16, s48, 6 +; GFX11-NEXT: v_writelane_b32 v16, s49, 7 +; GFX11-NEXT: v_writelane_b32 v16, s50, 8 +; GFX11-NEXT: v_writelane_b32 v16, s51, 9 +; GFX11-NEXT: v_writelane_b32 v16, s52, 10 +; GFX11-NEXT: v_writelane_b32 v16, s53, 11 +; GFX11-NEXT: v_writelane_b32 v16, s54, 12 +; GFX11-NEXT: v_writelane_b32 v16, s55, 13 +; GFX11-NEXT: v_writelane_b32 v16, s64, 14 +; GFX11-NEXT: v_writelane_b32 v16, s65, 15 +; GFX11-NEXT: v_writelane_b32 v16, s66, 16 +; GFX11-NEXT: v_writelane_b32 v16, s67, 17 +; GFX11-NEXT: v_writelane_b32 v16, s68, 18 +; GFX11-NEXT: v_writelane_b32 v16, s69, 19 +; GFX11-NEXT: v_writelane_b32 v16, s70, 20 +; GFX11-NEXT: v_writelane_b32 v16, s71, 21 +; GFX11-NEXT: v_writelane_b32 v16, s80, 22 +; GFX11-NEXT: v_writelane_b32 v16, s81, 23 +; GFX11-NEXT: v_writelane_b32 v16, s82, 24 +; GFX11-NEXT: v_writelane_b32 v16, s83, 25 +; GFX11-NEXT: v_writelane_b32 v16, s84, 26 +; GFX11-NEXT: v_writelane_b32 v16, s85, 27 +; GFX11-NEXT: v_writelane_b32 v16, s86, 28 +; GFX11-NEXT: v_writelane_b32 v16, s87, 29 +; GFX11-NEXT: v_writelane_b32 v16, s96, 30 +; GFX11-NEXT: v_writelane_b32 v16, s97, 31 +; GFX11-NEXT: v_writelane_b32 v17, s98, 0 +; GFX11-NEXT: v_writelane_b32 v17, s99, 1 +; GFX11-NEXT: v_writelane_b32 v17, s100, 2 +; GFX11-NEXT: v_writelane_b32 v17, s101, 3 +; GFX11-NEXT: v_writelane_b32 v17, s102, 4 +; GFX11-NEXT: v_writelane_b32 v17, s103, 5 +; GFX11-NEXT: v_writelane_b32 v17, s104, 6 +; GFX11-NEXT: v_writelane_b32 v17, s30, 7 +; GFX11-NEXT: v_writelane_b32 v17, s31, 8 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 ; GFX11-NEXT: v_readfirstlane_b32 s40, v1 ; GFX11-NEXT: v_readfirstlane_b32 s41, v2 -; GFX11-NEXT: v_writelane_b32 v16, s31, 1 -; GFX11-NEXT: v_writelane_b32 v17, s97, 1 ; GFX11-NEXT: v_readfirstlane_b32 s14, v3 ; GFX11-NEXT: v_readfirstlane_b32 s15, v4 ; GFX11-NEXT: v_readfirstlane_b32 s12, v5 -; GFX11-NEXT: v_writelane_b32 v16, s34, 2 -; GFX11-NEXT: v_writelane_b32 v17, s98, 2 ; GFX11-NEXT: v_readfirstlane_b32 s13, v6 ; GFX11-NEXT: v_readfirstlane_b32 s10, v7 ; GFX11-NEXT: v_readfirstlane_b32 s11, v8 -; GFX11-NEXT: v_writelane_b32 v16, s35, 3 -; GFX11-NEXT: v_writelane_b32 v17, s99, 3 ; GFX11-NEXT: v_readfirstlane_b32 s8, v9 ; GFX11-NEXT: v_readfirstlane_b32 s9, v10 ; GFX11-NEXT: v_readfirstlane_b32 s6, v11 -; GFX11-NEXT: v_writelane_b32 v16, s36, 4 -; GFX11-NEXT: v_writelane_b32 v17, s100, 4 ; GFX11-NEXT: v_readfirstlane_b32 s7, v12 ; GFX11-NEXT: v_readfirstlane_b32 s4, v13 ; GFX11-NEXT: v_readfirstlane_b32 s5, v14 -; GFX11-NEXT: v_writelane_b32 v16, s37, 5 -; GFX11-NEXT: v_writelane_b32 v17, s101, 5 ; GFX11-NEXT: s_mov_b32 s101, 0 ; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr19 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr18 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v16, s38, 6 -; GFX11-NEXT: v_writelane_b32 v17, s102, 6 -; GFX11-NEXT: v_writelane_b32 v16, s39, 7 -; GFX11-NEXT: v_writelane_b32 v17, s103, 7 -; GFX11-NEXT: v_writelane_b32 v16, s48, 8 -; GFX11-NEXT: v_writelane_b32 v17, s104, 8 -; GFX11-NEXT: v_writelane_b32 v16, s49, 9 -; GFX11-NEXT: v_writelane_b32 v16, s50, 10 -; GFX11-NEXT: v_writelane_b32 v16, s51, 11 -; GFX11-NEXT: v_writelane_b32 v16, s52, 12 -; GFX11-NEXT: v_writelane_b32 v16, s53, 13 -; GFX11-NEXT: v_writelane_b32 v16, s54, 14 -; GFX11-NEXT: v_writelane_b32 v16, s55, 15 -; GFX11-NEXT: v_writelane_b32 v16, s64, 16 -; GFX11-NEXT: v_writelane_b32 v16, s65, 17 -; GFX11-NEXT: v_writelane_b32 v16, s66, 18 -; GFX11-NEXT: v_writelane_b32 v16, s67, 19 -; GFX11-NEXT: v_writelane_b32 v16, s68, 20 -; GFX11-NEXT: v_writelane_b32 v16, s69, 21 -; GFX11-NEXT: v_writelane_b32 v16, s70, 22 -; GFX11-NEXT: v_writelane_b32 v16, s71, 23 -; GFX11-NEXT: v_writelane_b32 v16, s80, 24 -; GFX11-NEXT: v_writelane_b32 v16, s81, 25 -; GFX11-NEXT: v_writelane_b32 v16, s82, 26 -; GFX11-NEXT: v_writelane_b32 v16, s83, 27 -; GFX11-NEXT: v_writelane_b32 v16, s84, 28 -; GFX11-NEXT: v_writelane_b32 v16, s85, 29 -; GFX11-NEXT: v_writelane_b32 v16, s86, 30 -; GFX11-NEXT: v_writelane_b32 v16, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB57_2 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s43, s25, 8 @@ -85225,9 +86146,9 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff ; GFX11-NEXT: v_readlane_b32 s17, v19, 29 ; GFX11-NEXT: s_and_b32 s16, s16, 0xff -; GFX11-NEXT: v_readlane_b32 s100, v17, 4 +; GFX11-NEXT: v_readlane_b32 s30, v17, 7 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-NEXT: v_readlane_b32 s99, v17, 3 +; GFX11-NEXT: v_readlane_b32 s31, v17, 8 ; GFX11-NEXT: s_or_b32 s2, s2, s3 ; GFX11-NEXT: s_and_b32 s3, s18, 0xff ; GFX11-NEXT: s_lshl_b32 s2, s2, 16 @@ -85261,7 +86182,7 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: s_or_b32 s16, s16, s17 ; GFX11-NEXT: v_readlane_b32 s17, v19, 21 ; GFX11-NEXT: s_lshl_b32 s16, s16, 16 -; GFX11-NEXT: v_readlane_b32 s98, v17, 2 +; GFX11-NEXT: v_readlane_b32 s100, v17, 2 ; GFX11-NEXT: s_or_b32 s3, s3, s16 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_dual_mov_b32 v7, s2 :: v_dual_mov_b32 v8, s3 @@ -85281,12 +86202,12 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: s_or_b32 s0, s0, s1 ; GFX11-NEXT: s_and_b32 s1, s21, 0xff ; GFX11-NEXT: s_lshl_b32 s2, s2, 8 -; GFX11-NEXT: v_readlane_b32 s86, v16, 30 +; GFX11-NEXT: v_readlane_b32 s99, v17, 1 ; GFX11-NEXT: s_or_b32 s1, s1, s2 ; GFX11-NEXT: v_readlane_b32 s2, v19, 25 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff -; GFX11-NEXT: v_readlane_b32 s31, v16, 1 -; GFX11-NEXT: v_readlane_b32 s30, v16, 0 +; GFX11-NEXT: v_readlane_b32 s98, v17, 0 +; GFX11-NEXT: v_readlane_b32 s86, v16, 28 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_or_b32 s2, s2, s3 @@ -85323,9 +86244,9 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: s_lshl_b32 s3, s88, 8 ; GFX11-NEXT: s_and_b32 s16, s69, 0xff ; GFX11-NEXT: s_and_b32 s18, s72, 0xff -; GFX11-NEXT: v_readlane_b32 s97, v17, 1 +; GFX11-NEXT: v_readlane_b32 s97, v16, 31 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff -; GFX11-NEXT: v_readlane_b32 s69, v16, 21 +; GFX11-NEXT: v_readlane_b32 s69, v16, 19 ; GFX11-NEXT: s_or_b32 s1, s2, s3 ; GFX11-NEXT: v_readlane_b32 s3, v19, 16 ; GFX11-NEXT: s_and_b32 s2, s25, 0xff @@ -85532,39 +86453,39 @@ define inreg <128 x i8> @bitcast_v16i64_to_v128i8_scalar(<16 x i64> inreg %a, i3 ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:96 ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:112 -; GFX11-NEXT: v_readlane_b32 s104, v17, 8 -; GFX11-NEXT: v_readlane_b32 s103, v17, 7 -; GFX11-NEXT: v_readlane_b32 s102, v17, 6 -; GFX11-NEXT: v_readlane_b32 s101, v17, 5 -; GFX11-NEXT: v_readlane_b32 s96, v17, 0 -; GFX11-NEXT: v_readlane_b32 s87, v16, 31 -; GFX11-NEXT: v_readlane_b32 s85, v16, 29 -; GFX11-NEXT: v_readlane_b32 s84, v16, 28 -; GFX11-NEXT: v_readlane_b32 s83, v16, 27 -; GFX11-NEXT: v_readlane_b32 s82, v16, 26 -; GFX11-NEXT: v_readlane_b32 s81, v16, 25 -; GFX11-NEXT: v_readlane_b32 s80, v16, 24 -; GFX11-NEXT: v_readlane_b32 s71, v16, 23 -; GFX11-NEXT: v_readlane_b32 s70, v16, 22 -; GFX11-NEXT: v_readlane_b32 s68, v16, 20 -; GFX11-NEXT: v_readlane_b32 s67, v16, 19 -; GFX11-NEXT: v_readlane_b32 s66, v16, 18 -; GFX11-NEXT: v_readlane_b32 s65, v16, 17 -; GFX11-NEXT: v_readlane_b32 s64, v16, 16 -; GFX11-NEXT: v_readlane_b32 s55, v16, 15 -; GFX11-NEXT: v_readlane_b32 s54, v16, 14 -; GFX11-NEXT: v_readlane_b32 s53, v16, 13 -; GFX11-NEXT: v_readlane_b32 s52, v16, 12 -; GFX11-NEXT: v_readlane_b32 s51, v16, 11 -; GFX11-NEXT: v_readlane_b32 s50, v16, 10 -; GFX11-NEXT: v_readlane_b32 s49, v16, 9 -; GFX11-NEXT: v_readlane_b32 s48, v16, 8 -; GFX11-NEXT: v_readlane_b32 s39, v16, 7 -; GFX11-NEXT: v_readlane_b32 s38, v16, 6 -; GFX11-NEXT: v_readlane_b32 s37, v16, 5 -; GFX11-NEXT: v_readlane_b32 s36, v16, 4 -; GFX11-NEXT: v_readlane_b32 s35, v16, 3 -; GFX11-NEXT: v_readlane_b32 s34, v16, 2 +; GFX11-NEXT: v_readlane_b32 s104, v17, 6 +; GFX11-NEXT: v_readlane_b32 s103, v17, 5 +; GFX11-NEXT: v_readlane_b32 s102, v17, 4 +; GFX11-NEXT: v_readlane_b32 s101, v17, 3 +; GFX11-NEXT: v_readlane_b32 s96, v16, 30 +; GFX11-NEXT: v_readlane_b32 s87, v16, 29 +; GFX11-NEXT: v_readlane_b32 s85, v16, 27 +; GFX11-NEXT: v_readlane_b32 s84, v16, 26 +; GFX11-NEXT: v_readlane_b32 s83, v16, 25 +; GFX11-NEXT: v_readlane_b32 s82, v16, 24 +; GFX11-NEXT: v_readlane_b32 s81, v16, 23 +; GFX11-NEXT: v_readlane_b32 s80, v16, 22 +; GFX11-NEXT: v_readlane_b32 s71, v16, 21 +; GFX11-NEXT: v_readlane_b32 s70, v16, 20 +; GFX11-NEXT: v_readlane_b32 s68, v16, 18 +; GFX11-NEXT: v_readlane_b32 s67, v16, 17 +; GFX11-NEXT: v_readlane_b32 s66, v16, 16 +; GFX11-NEXT: v_readlane_b32 s65, v16, 15 +; GFX11-NEXT: v_readlane_b32 s64, v16, 14 +; GFX11-NEXT: v_readlane_b32 s55, v16, 13 +; GFX11-NEXT: v_readlane_b32 s54, v16, 12 +; GFX11-NEXT: v_readlane_b32 s53, v16, 11 +; GFX11-NEXT: v_readlane_b32 s52, v16, 10 +; GFX11-NEXT: v_readlane_b32 s51, v16, 9 +; GFX11-NEXT: v_readlane_b32 s50, v16, 8 +; GFX11-NEXT: v_readlane_b32 s49, v16, 7 +; GFX11-NEXT: v_readlane_b32 s48, v16, 6 +; GFX11-NEXT: v_readlane_b32 s39, v16, 5 +; GFX11-NEXT: v_readlane_b32 s38, v16, 4 +; GFX11-NEXT: v_readlane_b32 s37, v16, 3 +; GFX11-NEXT: v_readlane_b32 s36, v16, 2 +; GFX11-NEXT: v_readlane_b32 s35, v16, 1 +; GFX11-NEXT: v_readlane_b32 s34, v16, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v16, off, s32 @@ -90331,56 +91252,105 @@ define <16 x i64> @bitcast_v128i8_to_v16i64(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:592 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:588 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:584 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:468 ; GFX11-FAKE16-NEXT: s_clause 0x12 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -94652,45 +95622,83 @@ define inreg <16 x i64> @bitcast_v128i8_to_v16i64_scalar(<128 x i8> inreg %a, i3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-TRUE16-NEXT: s_clause 0x7 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -95593,45 +96601,83 @@ define inreg <16 x i64> @bitcast_v128i8_to_v16i64_scalar(<128 x i8> inreg %a, i3 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-FAKE16-NEXT: s_clause 0x7 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -97464,43 +98510,43 @@ define inreg <64 x bfloat> @bitcast_v16i64_to_v64bf16_scalar(<16 x i64> inreg %a ; SI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 -; SI-NEXT: v_writelane_b32 v20, s69, 21 -; SI-NEXT: v_writelane_b32 v20, s70, 22 -; SI-NEXT: v_writelane_b32 v20, s71, 23 -; SI-NEXT: v_writelane_b32 v20, s80, 24 -; SI-NEXT: v_writelane_b32 v20, s81, 25 -; SI-NEXT: v_writelane_b32 v20, s82, 26 -; SI-NEXT: v_writelane_b32 v20, s83, 27 -; SI-NEXT: v_writelane_b32 v20, s84, 28 -; SI-NEXT: v_writelane_b32 v20, s85, 29 -; SI-NEXT: v_writelane_b32 v20, s86, 30 -; SI-NEXT: v_writelane_b32 v20, s87, 31 -; SI-NEXT: v_writelane_b32 v20, s96, 32 -; SI-NEXT: v_writelane_b32 v20, s97, 33 -; SI-NEXT: v_writelane_b32 v20, s98, 34 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s70, 20 +; SI-NEXT: v_writelane_b32 v20, s71, 21 +; SI-NEXT: v_writelane_b32 v20, s80, 22 +; SI-NEXT: v_writelane_b32 v20, s81, 23 +; SI-NEXT: v_writelane_b32 v20, s82, 24 +; SI-NEXT: v_writelane_b32 v20, s83, 25 +; SI-NEXT: v_writelane_b32 v20, s84, 26 +; SI-NEXT: v_writelane_b32 v20, s85, 27 +; SI-NEXT: v_writelane_b32 v20, s86, 28 +; SI-NEXT: v_writelane_b32 v20, s87, 29 +; SI-NEXT: v_writelane_b32 v20, s96, 30 +; SI-NEXT: v_writelane_b32 v20, s97, 31 +; SI-NEXT: v_writelane_b32 v20, s98, 32 +; SI-NEXT: v_writelane_b32 v20, s99, 33 +; SI-NEXT: v_writelane_b32 v20, s30, 34 +; SI-NEXT: v_writelane_b32 v20, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s70, v1 ; SI-NEXT: v_readfirstlane_b32 s71, v2 ; SI-NEXT: v_readfirstlane_b32 s80, v3 @@ -97921,43 +98967,43 @@ define inreg <64 x bfloat> @bitcast_v16i64_to_v64bf16_scalar(<16 x i64> inreg %a ; SI-NEXT: v_mul_f32_e64 v2, 1.0, s4 ; SI-NEXT: v_alignbit_b32 v1, v1, v2, 16 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 +; SI-NEXT: v_readlane_b32 s30, v20, 34 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v20, 35 -; SI-NEXT: v_readlane_b32 s98, v20, 34 -; SI-NEXT: v_readlane_b32 s97, v20, 33 -; SI-NEXT: v_readlane_b32 s96, v20, 32 -; SI-NEXT: v_readlane_b32 s87, v20, 31 -; SI-NEXT: v_readlane_b32 s86, v20, 30 -; SI-NEXT: v_readlane_b32 s85, v20, 29 -; SI-NEXT: v_readlane_b32 s84, v20, 28 -; SI-NEXT: v_readlane_b32 s83, v20, 27 -; SI-NEXT: v_readlane_b32 s82, v20, 26 -; SI-NEXT: v_readlane_b32 s81, v20, 25 -; SI-NEXT: v_readlane_b32 s80, v20, 24 -; SI-NEXT: v_readlane_b32 s71, v20, 23 -; SI-NEXT: v_readlane_b32 s70, v20, 22 -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 35 +; SI-NEXT: v_readlane_b32 s99, v20, 33 +; SI-NEXT: v_readlane_b32 s98, v20, 32 +; SI-NEXT: v_readlane_b32 s97, v20, 31 +; SI-NEXT: v_readlane_b32 s96, v20, 30 +; SI-NEXT: v_readlane_b32 s87, v20, 29 +; SI-NEXT: v_readlane_b32 s86, v20, 28 +; SI-NEXT: v_readlane_b32 s85, v20, 27 +; SI-NEXT: v_readlane_b32 s84, v20, 26 +; SI-NEXT: v_readlane_b32 s83, v20, 25 +; SI-NEXT: v_readlane_b32 s82, v20, 24 +; SI-NEXT: v_readlane_b32 s81, v20, 23 +; SI-NEXT: v_readlane_b32 s80, v20, 22 +; SI-NEXT: v_readlane_b32 s71, v20, 21 +; SI-NEXT: v_readlane_b32 s70, v20, 20 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -100086,20 +101132,35 @@ define <16 x i64> @bitcast_v64bf16_to_v16i64(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:8 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 @@ -103135,81 +104196,149 @@ define inreg <16 x i64> @bitcast_v64bf16_to_v16i64_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v64bf16_to_v16i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 ; GFX11-TRUE16-NEXT: s_clause 0x6 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 @@ -104109,83 +105238,153 @@ define inreg <16 x i64> @bitcast_v64bf16_to_v16i64_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v16i64_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 ; GFX11-FAKE16-NEXT: s_clause 0x8 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 @@ -105961,12 +107160,26 @@ define inreg <64 x half> @bitcast_v16i64_to_v64f16_scalar(<16 x i64> inreg %a, i ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s30, 2 +; SI-NEXT: v_writelane_b32 v63, s31, 3 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s35, 3 ; SI-NEXT: v_readfirstlane_b32 s46, v1 ; SI-NEXT: v_readfirstlane_b32 s47, v2 ; SI-NEXT: v_readfirstlane_b32 s44, v3 @@ -105986,21 +107199,6 @@ define inreg <64 x half> @bitcast_v16i64_to_v64f16_scalar(<16 x i64> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v17 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB65_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -106242,15 +107440,15 @@ define inreg <64 x half> @bitcast_v16i64_to_v64f16_scalar(<16 x i64> inreg %a, i ; SI-NEXT: v_cvt_f16_f32_e32 v61, v61 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NEXT: v_readlane_b32 s35, v63, 3 +; SI-NEXT: v_readlane_b32 s30, v63, 2 ; SI-NEXT: v_lshlrev_b32_e32 v61, 16, v61 ; SI-NEXT: v_or_b32_e32 v2, v2, v61 ; SI-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v2, v59 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s31, v63, 3 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 ; SI-NEXT: v_add_i32_e32 v2, vcc, 4, v0 @@ -108699,84 +109897,155 @@ define inreg <16 x i64> @bitcast_v64f16_to_v16i64_scalar(<64 x half> inreg %a, i ; GFX11-LABEL: bitcast_v64f16_to_v16i64_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -109538,28 +110807,29 @@ define inreg <64 x i16> @bitcast_v16i64_to_v64i16_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s30, 20 +; SI-NEXT: v_writelane_b32 v20, s31, 21 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -109579,7 +110849,6 @@ define inreg <64 x i16> @bitcast_v16i64_to_v64i16_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: v_writelane_b32 v20, s69, 21 ; SI-NEXT: s_cbranch_scc0 .LBB69_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s38, s5, 16 @@ -109898,29 +111167,29 @@ define inreg <64 x i16> @bitcast_v16i64_to_v64i16_scalar(<16 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v20, 20 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 21 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -111763,84 +113032,155 @@ define inreg <16 x i64> @bitcast_v64i16_to_v16i64_scalar(<64 x i16> inreg %a, i3 ; GFX11-LABEL: bitcast_v64i16_to_v16i64_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -115630,24 +116970,43 @@ define <128 x i8> @bitcast_v16f64_to_v128i8(<16 x double> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -116321,44 +117680,56 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_writelane_b32 v63, s97, 33 -; SI-NEXT: v_writelane_b32 v63, s98, 34 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s4, v1 ; SI-NEXT: v_readfirstlane_b32 s5, v2 ; SI-NEXT: v_readfirstlane_b32 s6, v3 @@ -116378,19 +117749,6 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s44, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s45, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr61 : SGPR spill to VGPR lane ; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB73_3 @@ -117476,42 +118834,42 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; SI-NEXT: v_and_b32_e32 v5, 0xff, v5 ; SI-NEXT: v_and_b32_e32 v3, 0xff, v3 ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s30, v63, 34 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_and_b32_e32 v26, 0xff, v26 ; SI-NEXT: v_lshlrev_b32_e32 v26, 16, v26 @@ -118039,39 +119397,53 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_writelane_b32 v63, s68, 20 -; VI-NEXT: v_writelane_b32 v63, s69, 21 -; VI-NEXT: v_writelane_b32 v63, s70, 22 -; VI-NEXT: v_writelane_b32 v63, s71, 23 -; VI-NEXT: v_writelane_b32 v63, s80, 24 -; VI-NEXT: v_writelane_b32 v63, s81, 25 -; VI-NEXT: v_writelane_b32 v63, s82, 26 -; VI-NEXT: v_writelane_b32 v63, s83, 27 -; VI-NEXT: v_writelane_b32 v63, s84, 28 -; VI-NEXT: v_writelane_b32 v63, s85, 29 -; VI-NEXT: v_writelane_b32 v63, s86, 30 +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s68, 18 +; VI-NEXT: v_writelane_b32 v63, s69, 19 +; VI-NEXT: v_writelane_b32 v63, s70, 20 +; VI-NEXT: v_writelane_b32 v63, s71, 21 +; VI-NEXT: v_writelane_b32 v63, s80, 22 +; VI-NEXT: v_writelane_b32 v63, s81, 23 +; VI-NEXT: v_writelane_b32 v63, s82, 24 +; VI-NEXT: v_writelane_b32 v63, s83, 25 +; VI-NEXT: v_writelane_b32 v63, s84, 26 +; VI-NEXT: v_writelane_b32 v63, s85, 27 +; VI-NEXT: v_writelane_b32 v63, s86, 28 +; VI-NEXT: v_writelane_b32 v63, s87, 29 +; VI-NEXT: v_writelane_b32 v63, s30, 30 +; VI-NEXT: v_writelane_b32 v63, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v63, s87, 31 ; VI-NEXT: v_readfirstlane_b32 s6, v1 ; VI-NEXT: v_readfirstlane_b32 s7, v2 ; VI-NEXT: v_readfirstlane_b32 s8, v3 @@ -118091,20 +119463,6 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; VI-NEXT: v_readfirstlane_b32 s4, v17 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v18 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB73_3 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -118901,38 +120259,38 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; VI-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload ; VI-NEXT: v_or_b32_sdwa v32, v32, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_readlane_b32 s87, v63, 31 -; VI-NEXT: v_readlane_b32 s86, v63, 30 -; VI-NEXT: v_readlane_b32 s85, v63, 29 -; VI-NEXT: v_readlane_b32 s84, v63, 28 -; VI-NEXT: v_readlane_b32 s83, v63, 27 -; VI-NEXT: v_readlane_b32 s82, v63, 26 -; VI-NEXT: v_readlane_b32 s81, v63, 25 -; VI-NEXT: v_readlane_b32 s80, v63, 24 -; VI-NEXT: v_readlane_b32 s71, v63, 23 -; VI-NEXT: v_readlane_b32 s70, v63, 22 -; VI-NEXT: v_readlane_b32 s69, v63, 21 -; VI-NEXT: v_readlane_b32 s68, v63, 20 -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 30 +; VI-NEXT: v_readlane_b32 s31, v63, 31 +; VI-NEXT: v_readlane_b32 s87, v63, 29 +; VI-NEXT: v_readlane_b32 s86, v63, 28 +; VI-NEXT: v_readlane_b32 s85, v63, 27 +; VI-NEXT: v_readlane_b32 s84, v63, 26 +; VI-NEXT: v_readlane_b32 s83, v63, 25 +; VI-NEXT: v_readlane_b32 s82, v63, 24 +; VI-NEXT: v_readlane_b32 s81, v63, 23 +; VI-NEXT: v_readlane_b32 s80, v63, 22 +; VI-NEXT: v_readlane_b32 s71, v63, 21 +; VI-NEXT: v_readlane_b32 s70, v63, 20 +; VI-NEXT: v_readlane_b32 s69, v63, 19 +; VI-NEXT: v_readlane_b32 s68, v63, 18 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(2) ; VI-NEXT: v_lshlrev_b32_e32 v34, 8, v42 ; VI-NEXT: s_waitcnt vmcnt(0) @@ -119323,43 +120681,57 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_writelane_b32 v63, s64, 16 -; GFX9-NEXT: v_writelane_b32 v63, s65, 17 -; GFX9-NEXT: v_writelane_b32 v63, s66, 18 -; GFX9-NEXT: v_writelane_b32 v63, s67, 19 -; GFX9-NEXT: v_writelane_b32 v63, s68, 20 -; GFX9-NEXT: v_writelane_b32 v63, s69, 21 -; GFX9-NEXT: v_writelane_b32 v63, s70, 22 -; GFX9-NEXT: v_writelane_b32 v63, s71, 23 -; GFX9-NEXT: v_writelane_b32 v63, s80, 24 -; GFX9-NEXT: v_writelane_b32 v63, s81, 25 -; GFX9-NEXT: v_writelane_b32 v63, s82, 26 -; GFX9-NEXT: v_writelane_b32 v63, s83, 27 -; GFX9-NEXT: v_writelane_b32 v63, s84, 28 -; GFX9-NEXT: v_writelane_b32 v63, s85, 29 -; GFX9-NEXT: v_writelane_b32 v63, s86, 30 -; GFX9-NEXT: v_writelane_b32 v63, s87, 31 -; GFX9-NEXT: v_writelane_b32 v63, s96, 32 -; GFX9-NEXT: v_writelane_b32 v63, s97, 33 -; GFX9-NEXT: v_writelane_b32 v63, s98, 34 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s64, 14 +; GFX9-NEXT: v_writelane_b32 v63, s65, 15 +; GFX9-NEXT: v_writelane_b32 v63, s66, 16 +; GFX9-NEXT: v_writelane_b32 v63, s67, 17 +; GFX9-NEXT: v_writelane_b32 v63, s68, 18 +; GFX9-NEXT: v_writelane_b32 v63, s69, 19 +; GFX9-NEXT: v_writelane_b32 v63, s70, 20 +; GFX9-NEXT: v_writelane_b32 v63, s71, 21 +; GFX9-NEXT: v_writelane_b32 v63, s80, 22 +; GFX9-NEXT: v_writelane_b32 v63, s81, 23 +; GFX9-NEXT: v_writelane_b32 v63, s82, 24 +; GFX9-NEXT: v_writelane_b32 v63, s83, 25 +; GFX9-NEXT: v_writelane_b32 v63, s84, 26 +; GFX9-NEXT: v_writelane_b32 v63, s85, 27 +; GFX9-NEXT: v_writelane_b32 v63, s86, 28 +; GFX9-NEXT: v_writelane_b32 v63, s87, 29 +; GFX9-NEXT: v_writelane_b32 v63, s96, 30 +; GFX9-NEXT: v_writelane_b32 v63, s97, 31 +; GFX9-NEXT: v_writelane_b32 v63, s98, 32 +; GFX9-NEXT: v_writelane_b32 v63, s99, 33 +; GFX9-NEXT: v_writelane_b32 v63, s30, 34 +; GFX9-NEXT: v_writelane_b32 v63, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v63, s99, 35 ; GFX9-NEXT: v_readfirstlane_b32 s6, v1 ; GFX9-NEXT: v_readfirstlane_b32 s7, v2 ; GFX9-NEXT: v_readfirstlane_b32 s8, v3 @@ -119379,20 +120751,6 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; GFX9-NEXT: v_readfirstlane_b32 s4, v17 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v18 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB73_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -120215,42 +121573,42 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; GFX9-NEXT: v_or_b32_sdwa v18, v38, v18 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_lshlrev_b32_e32 v38, 8, v41 ; GFX9-NEXT: v_or_b32_sdwa v35, v35, v38 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_readlane_b32 s99, v63, 35 -; GFX9-NEXT: v_readlane_b32 s98, v63, 34 -; GFX9-NEXT: v_readlane_b32 s97, v63, 33 -; GFX9-NEXT: v_readlane_b32 s96, v63, 32 -; GFX9-NEXT: v_readlane_b32 s87, v63, 31 -; GFX9-NEXT: v_readlane_b32 s86, v63, 30 -; GFX9-NEXT: v_readlane_b32 s85, v63, 29 -; GFX9-NEXT: v_readlane_b32 s84, v63, 28 -; GFX9-NEXT: v_readlane_b32 s83, v63, 27 -; GFX9-NEXT: v_readlane_b32 s82, v63, 26 -; GFX9-NEXT: v_readlane_b32 s81, v63, 25 -; GFX9-NEXT: v_readlane_b32 s80, v63, 24 -; GFX9-NEXT: v_readlane_b32 s71, v63, 23 -; GFX9-NEXT: v_readlane_b32 s70, v63, 22 -; GFX9-NEXT: v_readlane_b32 s69, v63, 21 -; GFX9-NEXT: v_readlane_b32 s68, v63, 20 -; GFX9-NEXT: v_readlane_b32 s67, v63, 19 -; GFX9-NEXT: v_readlane_b32 s66, v63, 18 -; GFX9-NEXT: v_readlane_b32 s65, v63, 17 -; GFX9-NEXT: v_readlane_b32 s64, v63, 16 -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 34 +; GFX9-NEXT: v_readlane_b32 s31, v63, 35 +; GFX9-NEXT: v_readlane_b32 s99, v63, 33 +; GFX9-NEXT: v_readlane_b32 s98, v63, 32 +; GFX9-NEXT: v_readlane_b32 s97, v63, 31 +; GFX9-NEXT: v_readlane_b32 s96, v63, 30 +; GFX9-NEXT: v_readlane_b32 s87, v63, 29 +; GFX9-NEXT: v_readlane_b32 s86, v63, 28 +; GFX9-NEXT: v_readlane_b32 s85, v63, 27 +; GFX9-NEXT: v_readlane_b32 s84, v63, 26 +; GFX9-NEXT: v_readlane_b32 s83, v63, 25 +; GFX9-NEXT: v_readlane_b32 s82, v63, 24 +; GFX9-NEXT: v_readlane_b32 s81, v63, 23 +; GFX9-NEXT: v_readlane_b32 s80, v63, 22 +; GFX9-NEXT: v_readlane_b32 s71, v63, 21 +; GFX9-NEXT: v_readlane_b32 s70, v63, 20 +; GFX9-NEXT: v_readlane_b32 s69, v63, 19 +; GFX9-NEXT: v_readlane_b32 s68, v63, 18 +; GFX9-NEXT: v_readlane_b32 s67, v63, 17 +; GFX9-NEXT: v_readlane_b32 s66, v63, 16 +; GFX9-NEXT: v_readlane_b32 s65, v63, 15 +; GFX9-NEXT: v_readlane_b32 s64, v63, 14 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v36, 8, v36 @@ -120601,91 +121959,113 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:92 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v76, s30, 0 -; GFX11-NEXT: v_writelane_b32 v77, s96, 0 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 -; GFX11-NEXT: v_readfirstlane_b32 s4, v1 -; GFX11-NEXT: v_readfirstlane_b32 s5, v2 -; GFX11-NEXT: v_writelane_b32 v76, s31, 1 -; GFX11-NEXT: v_writelane_b32 v77, s97, 1 -; GFX11-NEXT: v_readfirstlane_b32 s6, v3 -; GFX11-NEXT: v_readfirstlane_b32 s7, v4 -; GFX11-NEXT: v_readfirstlane_b32 s8, v5 -; GFX11-NEXT: v_writelane_b32 v76, s34, 2 -; GFX11-NEXT: v_writelane_b32 v77, s98, 2 -; GFX11-NEXT: v_readfirstlane_b32 s9, v6 -; GFX11-NEXT: v_readfirstlane_b32 s10, v7 -; GFX11-NEXT: v_readfirstlane_b32 s11, v8 -; GFX11-NEXT: v_writelane_b32 v76, s35, 3 -; GFX11-NEXT: v_writelane_b32 v77, s99, 3 -; GFX11-NEXT: v_readfirstlane_b32 s12, v9 -; GFX11-NEXT: v_readfirstlane_b32 s13, v10 -; GFX11-NEXT: v_readfirstlane_b32 s14, v11 -; GFX11-NEXT: v_writelane_b32 v76, s36, 4 -; GFX11-NEXT: v_writelane_b32 v77, s100, 4 -; GFX11-NEXT: v_readfirstlane_b32 s15, v12 -; GFX11-NEXT: v_readfirstlane_b32 s40, v13 -; GFX11-NEXT: v_readfirstlane_b32 s41, v14 -; GFX11-NEXT: v_writelane_b32 v76, s37, 5 -; GFX11-NEXT: v_writelane_b32 v77, s101, 5 -; GFX11-NEXT: s_mov_b32 vcc_hi, 0 -; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: s_clause 0x13 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 -; GFX11-NEXT: v_writelane_b32 v76, s38, 6 -; GFX11-NEXT: v_writelane_b32 v77, s102, 6 +; GFX11-NEXT: v_writelane_b32 v76, s34, 0 +; GFX11-NEXT: v_writelane_b32 v76, s35, 1 +; GFX11-NEXT: v_writelane_b32 v76, s36, 2 +; GFX11-NEXT: v_writelane_b32 v76, s37, 3 +; GFX11-NEXT: v_writelane_b32 v76, s38, 4 +; GFX11-NEXT: v_writelane_b32 v76, s39, 5 +; GFX11-NEXT: v_writelane_b32 v76, s48, 6 +; GFX11-NEXT: v_writelane_b32 v76, s49, 7 +; GFX11-NEXT: v_writelane_b32 v76, s50, 8 +; GFX11-NEXT: v_writelane_b32 v76, s51, 9 +; GFX11-NEXT: v_writelane_b32 v76, s52, 10 +; GFX11-NEXT: v_writelane_b32 v76, s53, 11 +; GFX11-NEXT: v_writelane_b32 v76, s54, 12 +; GFX11-NEXT: v_writelane_b32 v76, s55, 13 +; GFX11-NEXT: v_writelane_b32 v76, s64, 14 +; GFX11-NEXT: v_writelane_b32 v76, s65, 15 +; GFX11-NEXT: v_writelane_b32 v76, s66, 16 +; GFX11-NEXT: v_writelane_b32 v76, s67, 17 +; GFX11-NEXT: v_writelane_b32 v76, s68, 18 +; GFX11-NEXT: v_writelane_b32 v76, s69, 19 +; GFX11-NEXT: v_writelane_b32 v76, s70, 20 +; GFX11-NEXT: v_writelane_b32 v76, s71, 21 +; GFX11-NEXT: v_writelane_b32 v76, s80, 22 +; GFX11-NEXT: v_writelane_b32 v76, s81, 23 +; GFX11-NEXT: v_writelane_b32 v76, s82, 24 +; GFX11-NEXT: v_writelane_b32 v76, s83, 25 +; GFX11-NEXT: v_writelane_b32 v76, s84, 26 +; GFX11-NEXT: v_writelane_b32 v76, s85, 27 +; GFX11-NEXT: v_writelane_b32 v76, s86, 28 +; GFX11-NEXT: v_writelane_b32 v76, s87, 29 +; GFX11-NEXT: v_writelane_b32 v76, s96, 30 +; GFX11-NEXT: v_writelane_b32 v76, s97, 31 +; GFX11-NEXT: v_writelane_b32 v77, s98, 0 +; GFX11-NEXT: v_writelane_b32 v77, s99, 1 +; GFX11-NEXT: v_writelane_b32 v77, s100, 2 +; GFX11-NEXT: v_writelane_b32 v77, s101, 3 +; GFX11-NEXT: v_writelane_b32 v77, s102, 4 +; GFX11-NEXT: v_writelane_b32 v77, s103, 5 +; GFX11-NEXT: v_writelane_b32 v77, s104, 6 +; GFX11-NEXT: v_writelane_b32 v77, s30, 7 +; GFX11-NEXT: v_writelane_b32 v77, s31, 8 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 +; GFX11-NEXT: v_readfirstlane_b32 s4, v1 +; GFX11-NEXT: v_readfirstlane_b32 s5, v2 +; GFX11-NEXT: v_readfirstlane_b32 s6, v3 +; GFX11-NEXT: v_readfirstlane_b32 s7, v4 +; GFX11-NEXT: v_readfirstlane_b32 s8, v5 +; GFX11-NEXT: v_readfirstlane_b32 s9, v6 +; GFX11-NEXT: v_readfirstlane_b32 s10, v7 +; GFX11-NEXT: v_readfirstlane_b32 s11, v8 +; GFX11-NEXT: v_readfirstlane_b32 s12, v9 +; GFX11-NEXT: v_readfirstlane_b32 s13, v10 +; GFX11-NEXT: v_readfirstlane_b32 s14, v11 +; GFX11-NEXT: v_readfirstlane_b32 s15, v12 +; GFX11-NEXT: v_readfirstlane_b32 s40, v13 +; GFX11-NEXT: v_readfirstlane_b32 s41, v14 +; GFX11-NEXT: s_mov_b32 vcc_hi, 0 +; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr78 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr79 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v76, s39, 7 -; GFX11-NEXT: v_writelane_b32 v77, s103, 7 -; GFX11-NEXT: v_writelane_b32 v76, s48, 8 -; GFX11-NEXT: v_writelane_b32 v77, s104, 8 -; GFX11-NEXT: v_writelane_b32 v76, s49, 9 -; GFX11-NEXT: v_writelane_b32 v76, s50, 10 -; GFX11-NEXT: v_writelane_b32 v76, s51, 11 -; GFX11-NEXT: v_writelane_b32 v76, s52, 12 -; GFX11-NEXT: v_writelane_b32 v76, s53, 13 -; GFX11-NEXT: v_writelane_b32 v76, s54, 14 -; GFX11-NEXT: v_writelane_b32 v76, s55, 15 -; GFX11-NEXT: v_writelane_b32 v76, s64, 16 -; GFX11-NEXT: v_writelane_b32 v76, s65, 17 -; GFX11-NEXT: v_writelane_b32 v76, s66, 18 -; GFX11-NEXT: v_writelane_b32 v76, s67, 19 -; GFX11-NEXT: v_writelane_b32 v76, s68, 20 -; GFX11-NEXT: v_writelane_b32 v76, s69, 21 -; GFX11-NEXT: v_writelane_b32 v76, s70, 22 -; GFX11-NEXT: v_writelane_b32 v76, s71, 23 -; GFX11-NEXT: v_writelane_b32 v76, s80, 24 -; GFX11-NEXT: v_writelane_b32 v76, s81, 25 -; GFX11-NEXT: v_writelane_b32 v76, s82, 26 -; GFX11-NEXT: v_writelane_b32 v76, s83, 27 -; GFX11-NEXT: v_writelane_b32 v76, s84, 28 -; GFX11-NEXT: v_writelane_b32 v76, s85, 29 -; GFX11-NEXT: v_writelane_b32 v76, s86, 30 -; GFX11-NEXT: v_writelane_b32 v76, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB73_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s13, 16 @@ -121563,47 +122943,47 @@ define inreg <128 x i8> @bitcast_v16f64_to_v128i8_scalar(<16 x double> inreg %a, ; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:68 ; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:72 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:76 -; GFX11-NEXT: v_readlane_b32 s104, v77, 8 -; GFX11-NEXT: v_readlane_b32 s103, v77, 7 -; GFX11-NEXT: v_readlane_b32 s102, v77, 6 -; GFX11-NEXT: v_readlane_b32 s101, v77, 5 -; GFX11-NEXT: v_readlane_b32 s100, v77, 4 -; GFX11-NEXT: v_readlane_b32 s99, v77, 3 -; GFX11-NEXT: v_readlane_b32 s98, v77, 2 -; GFX11-NEXT: v_readlane_b32 s97, v77, 1 -; GFX11-NEXT: v_readlane_b32 s96, v77, 0 -; GFX11-NEXT: v_readlane_b32 s87, v76, 31 -; GFX11-NEXT: v_readlane_b32 s86, v76, 30 -; GFX11-NEXT: v_readlane_b32 s85, v76, 29 -; GFX11-NEXT: v_readlane_b32 s84, v76, 28 -; GFX11-NEXT: v_readlane_b32 s83, v76, 27 -; GFX11-NEXT: v_readlane_b32 s82, v76, 26 -; GFX11-NEXT: v_readlane_b32 s81, v76, 25 -; GFX11-NEXT: v_readlane_b32 s80, v76, 24 -; GFX11-NEXT: v_readlane_b32 s71, v76, 23 -; GFX11-NEXT: v_readlane_b32 s70, v76, 22 -; GFX11-NEXT: v_readlane_b32 s69, v76, 21 -; GFX11-NEXT: v_readlane_b32 s68, v76, 20 -; GFX11-NEXT: v_readlane_b32 s67, v76, 19 -; GFX11-NEXT: v_readlane_b32 s66, v76, 18 -; GFX11-NEXT: v_readlane_b32 s65, v76, 17 -; GFX11-NEXT: v_readlane_b32 s64, v76, 16 -; GFX11-NEXT: v_readlane_b32 s55, v76, 15 -; GFX11-NEXT: v_readlane_b32 s54, v76, 14 -; GFX11-NEXT: v_readlane_b32 s53, v76, 13 -; GFX11-NEXT: v_readlane_b32 s52, v76, 12 -; GFX11-NEXT: v_readlane_b32 s51, v76, 11 -; GFX11-NEXT: v_readlane_b32 s50, v76, 10 -; GFX11-NEXT: v_readlane_b32 s49, v76, 9 -; GFX11-NEXT: v_readlane_b32 s48, v76, 8 -; GFX11-NEXT: v_readlane_b32 s39, v76, 7 -; GFX11-NEXT: v_readlane_b32 s38, v76, 6 -; GFX11-NEXT: v_readlane_b32 s37, v76, 5 -; GFX11-NEXT: v_readlane_b32 s36, v76, 4 -; GFX11-NEXT: v_readlane_b32 s35, v76, 3 -; GFX11-NEXT: v_readlane_b32 s34, v76, 2 -; GFX11-NEXT: v_readlane_b32 s31, v76, 1 -; GFX11-NEXT: v_readlane_b32 s30, v76, 0 +; GFX11-NEXT: v_readlane_b32 s30, v77, 7 +; GFX11-NEXT: v_readlane_b32 s31, v77, 8 +; GFX11-NEXT: v_readlane_b32 s104, v77, 6 +; GFX11-NEXT: v_readlane_b32 s103, v77, 5 +; GFX11-NEXT: v_readlane_b32 s102, v77, 4 +; GFX11-NEXT: v_readlane_b32 s101, v77, 3 +; GFX11-NEXT: v_readlane_b32 s100, v77, 2 +; GFX11-NEXT: v_readlane_b32 s99, v77, 1 +; GFX11-NEXT: v_readlane_b32 s98, v77, 0 +; GFX11-NEXT: v_readlane_b32 s97, v76, 31 +; GFX11-NEXT: v_readlane_b32 s96, v76, 30 +; GFX11-NEXT: v_readlane_b32 s87, v76, 29 +; GFX11-NEXT: v_readlane_b32 s86, v76, 28 +; GFX11-NEXT: v_readlane_b32 s85, v76, 27 +; GFX11-NEXT: v_readlane_b32 s84, v76, 26 +; GFX11-NEXT: v_readlane_b32 s83, v76, 25 +; GFX11-NEXT: v_readlane_b32 s82, v76, 24 +; GFX11-NEXT: v_readlane_b32 s81, v76, 23 +; GFX11-NEXT: v_readlane_b32 s80, v76, 22 +; GFX11-NEXT: v_readlane_b32 s71, v76, 21 +; GFX11-NEXT: v_readlane_b32 s70, v76, 20 +; GFX11-NEXT: v_readlane_b32 s69, v76, 19 +; GFX11-NEXT: v_readlane_b32 s68, v76, 18 +; GFX11-NEXT: v_readlane_b32 s67, v76, 17 +; GFX11-NEXT: v_readlane_b32 s66, v76, 16 +; GFX11-NEXT: v_readlane_b32 s65, v76, 15 +; GFX11-NEXT: v_readlane_b32 s64, v76, 14 +; GFX11-NEXT: v_readlane_b32 s55, v76, 13 +; GFX11-NEXT: v_readlane_b32 s54, v76, 12 +; GFX11-NEXT: v_readlane_b32 s53, v76, 11 +; GFX11-NEXT: v_readlane_b32 s52, v76, 10 +; GFX11-NEXT: v_readlane_b32 s51, v76, 9 +; GFX11-NEXT: v_readlane_b32 s50, v76, 8 +; GFX11-NEXT: v_readlane_b32 s49, v76, 7 +; GFX11-NEXT: v_readlane_b32 s48, v76, 6 +; GFX11-NEXT: v_readlane_b32 s39, v76, 5 +; GFX11-NEXT: v_readlane_b32 s38, v76, 4 +; GFX11-NEXT: v_readlane_b32 s37, v76, 3 +; GFX11-NEXT: v_readlane_b32 s36, v76, 2 +; GFX11-NEXT: v_readlane_b32 s35, v76, 1 +; GFX11-NEXT: v_readlane_b32 s34, v76, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:80 @@ -126370,56 +127750,105 @@ define <16 x double> @bitcast_v128i8_to_v16f64(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:592 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:588 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:584 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:468 ; GFX11-FAKE16-NEXT: s_clause 0x12 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -130691,45 +132120,83 @@ define inreg <16 x double> @bitcast_v128i8_to_v16f64_scalar(<128 x i8> inreg %a, ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-TRUE16-NEXT: s_clause 0x7 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -131632,45 +133099,83 @@ define inreg <16 x double> @bitcast_v128i8_to_v16f64_scalar(<128 x i8> inreg %a, ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:456 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:352 ; GFX11-FAKE16-NEXT: s_clause 0x7 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v28 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, v26 :: v_dual_mov_b32 v35, v24 @@ -133410,44 +134915,57 @@ define inreg <64 x bfloat> @bitcast_v16f64_to_v64bf16_scalar(<16 x double> inreg ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_writelane_b32 v63, s97, 33 -; SI-NEXT: v_writelane_b32 v63, s98, 34 +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 ; SI-NEXT: v_readfirstlane_b32 s42, v3 @@ -133467,20 +134985,6 @@ define inreg <64 x bfloat> @bitcast_v16f64_to_v64bf16_scalar(<16 x double> inreg ; SI-NEXT: v_readfirstlane_b32 s6, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s7, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: s_cbranch_scc0 .LBB77_3 ; SI-NEXT: ; %bb.1: ; %cmp.false @@ -133884,42 +135388,42 @@ define inreg <64 x bfloat> @bitcast_v16f64_to_v64bf16_scalar(<16 x double> inreg ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v1, 1.0, v8 ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s30, v63, 34 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; SI-NEXT: v_alignbit_b32 v1, v1, v2, 16 @@ -136073,20 +137577,35 @@ define <16 x double> @bitcast_v64bf16_to_v16f64(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:8 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4 @@ -139122,81 +140641,149 @@ define inreg <16 x double> @bitcast_v64bf16_to_v16f64_scalar(<64 x bfloat> inreg ; GFX11-TRUE16-LABEL: bitcast_v64bf16_to_v16f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 ; GFX11-TRUE16-NEXT: s_clause 0x6 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 @@ -140096,83 +141683,153 @@ define inreg <16 x double> @bitcast_v64bf16_to_v16f64_scalar(<64 x bfloat> inreg ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v16f64_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 ; GFX11-FAKE16-NEXT: s_clause 0x8 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 @@ -141855,6 +143512,22 @@ define inreg <64 x half> @bitcast_v16f64_to_v64f16_scalar(<16 x double> inreg %a ; SI-LABEL: bitcast_v16f64_to_v64f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 @@ -141875,22 +143548,6 @@ define inreg <64 x half> @bitcast_v16f64_to_v64f16_scalar(<16 x double> inreg %a ; SI-NEXT: v_readfirstlane_b32 s4, v17 ; SI-NEXT: s_and_b64 s[46:47], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v18 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB81_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s46, s5, 16 @@ -144566,84 +146223,155 @@ define inreg <16 x double> @bitcast_v64f16_to_v16f64_scalar(<64 x half> inreg %a ; GFX11-LABEL: bitcast_v64f16_to_v16f64_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -145331,22 +147059,6 @@ define inreg <64 x i16> @bitcast_v16f64_to_v64i16_scalar(<16 x double> inreg %a, ; SI-LABEL: bitcast_v16f64_to_v64i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_mov_b32_e32 v31, s16 -; SI-NEXT: v_mov_b32_e32 v32, s17 -; SI-NEXT: v_mov_b32_e32 v29, s18 -; SI-NEXT: v_mov_b32_e32 v30, s19 -; SI-NEXT: v_mov_b32_e32 v27, s20 -; SI-NEXT: v_mov_b32_e32 v28, s21 -; SI-NEXT: v_mov_b32_e32 v25, s22 -; SI-NEXT: v_mov_b32_e32 v26, s23 -; SI-NEXT: v_mov_b32_e32 v23, s24 -; SI-NEXT: v_mov_b32_e32 v24, s25 -; SI-NEXT: v_mov_b32_e32 v21, s26 -; SI-NEXT: v_mov_b32_e32 v22, s27 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_mov_b32_e32 v19, s28 -; SI-NEXT: v_mov_b32_e32 v20, s29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -145363,6 +147075,22 @@ define inreg <64 x i16> @bitcast_v16f64_to_v64i16_scalar(<16 x double> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 +; SI-NEXT: v_mov_b32_e32 v31, s16 +; SI-NEXT: v_mov_b32_e32 v32, s17 +; SI-NEXT: v_mov_b32_e32 v29, s18 +; SI-NEXT: v_mov_b32_e32 v30, s19 +; SI-NEXT: v_mov_b32_e32 v27, s20 +; SI-NEXT: v_mov_b32_e32 v28, s21 +; SI-NEXT: v_mov_b32_e32 v25, s22 +; SI-NEXT: v_mov_b32_e32 v26, s23 +; SI-NEXT: v_mov_b32_e32 v23, s24 +; SI-NEXT: v_mov_b32_e32 v24, s25 +; SI-NEXT: v_mov_b32_e32 v21, s26 +; SI-NEXT: v_mov_b32_e32 v22, s27 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_mov_b32_e32 v19, s28 +; SI-NEXT: v_mov_b32_e32 v20, s29 ; SI-NEXT: s_cbranch_scc0 .LBB85_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshrrev_b32_e32 v33, 16, v18 @@ -147474,84 +149202,155 @@ define inreg <16 x double> @bitcast_v64i16_to_v16f64_scalar(<64 x i16> inreg %a, ; GFX11-LABEL: bitcast_v64i16_to_v16f64_scalar: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 ; GFX11-NEXT: s_clause 0x9 ; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v185, s32 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 ; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 ; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 ; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 @@ -153080,53 +154879,99 @@ define <64 x bfloat> @bitcast_v128i8_to_v64bf16(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:456 ; GFX11-FAKE16-NEXT: s_clause 0xf ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v37, v30 :: v_dual_mov_b32 v54, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v55, v28 :: v_dual_mov_b32 v52, v26 @@ -154019,6 +155864,43 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill +; SI-NEXT: s_waitcnt expcnt(3) +; SI-NEXT: v_writelane_b32 v41, s34, 0 +; SI-NEXT: v_writelane_b32 v41, s35, 1 +; SI-NEXT: v_writelane_b32 v41, s36, 2 +; SI-NEXT: v_writelane_b32 v41, s37, 3 +; SI-NEXT: v_writelane_b32 v41, s38, 4 +; SI-NEXT: v_writelane_b32 v41, s39, 5 +; SI-NEXT: v_writelane_b32 v41, s48, 6 +; SI-NEXT: v_writelane_b32 v41, s49, 7 +; SI-NEXT: v_writelane_b32 v41, s50, 8 +; SI-NEXT: v_writelane_b32 v41, s51, 9 +; SI-NEXT: v_writelane_b32 v41, s52, 10 +; SI-NEXT: v_writelane_b32 v41, s53, 11 +; SI-NEXT: v_writelane_b32 v41, s54, 12 +; SI-NEXT: v_writelane_b32 v41, s55, 13 +; SI-NEXT: v_writelane_b32 v41, s64, 14 +; SI-NEXT: v_writelane_b32 v41, s65, 15 +; SI-NEXT: v_writelane_b32 v41, s66, 16 +; SI-NEXT: v_writelane_b32 v41, s67, 17 +; SI-NEXT: v_writelane_b32 v41, s68, 18 +; SI-NEXT: v_writelane_b32 v41, s69, 19 +; SI-NEXT: v_writelane_b32 v41, s70, 20 +; SI-NEXT: v_writelane_b32 v41, s71, 21 +; SI-NEXT: v_writelane_b32 v41, s80, 22 +; SI-NEXT: v_writelane_b32 v41, s81, 23 +; SI-NEXT: v_writelane_b32 v41, s82, 24 +; SI-NEXT: v_writelane_b32 v41, s83, 25 +; SI-NEXT: v_writelane_b32 v41, s84, 26 +; SI-NEXT: v_writelane_b32 v41, s85, 27 +; SI-NEXT: v_writelane_b32 v41, s86, 28 +; SI-NEXT: v_writelane_b32 v41, s87, 29 +; SI-NEXT: v_writelane_b32 v41, s96, 30 +; SI-NEXT: v_writelane_b32 v41, s97, 31 +; SI-NEXT: v_writelane_b32 v41, s98, 32 +; SI-NEXT: v_writelane_b32 v41, s99, 33 +; SI-NEXT: v_writelane_b32 v41, s30, 34 +; SI-NEXT: v_writelane_b32 v41, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:332 ; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:328 ; SI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:324 @@ -154035,44 +155917,8 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; SI-NEXT: v_writelane_b32 v43, s17, 2 ; SI-NEXT: v_writelane_b32 v43, s16, 3 ; SI-NEXT: s_mov_b32 s60, s24 -; SI-NEXT: v_writelane_b32 v41, s30, 0 -; SI-NEXT: v_writelane_b32 v41, s31, 1 -; SI-NEXT: v_writelane_b32 v41, s34, 2 -; SI-NEXT: v_writelane_b32 v41, s35, 3 -; SI-NEXT: v_writelane_b32 v41, s36, 4 -; SI-NEXT: v_writelane_b32 v41, s37, 5 -; SI-NEXT: v_writelane_b32 v41, s38, 6 -; SI-NEXT: v_writelane_b32 v41, s39, 7 -; SI-NEXT: v_writelane_b32 v41, s48, 8 -; SI-NEXT: v_writelane_b32 v41, s49, 9 -; SI-NEXT: v_writelane_b32 v41, s50, 10 -; SI-NEXT: v_writelane_b32 v41, s51, 11 -; SI-NEXT: v_writelane_b32 v41, s52, 12 -; SI-NEXT: v_writelane_b32 v41, s53, 13 -; SI-NEXT: v_writelane_b32 v41, s54, 14 -; SI-NEXT: v_writelane_b32 v41, s55, 15 -; SI-NEXT: v_writelane_b32 v41, s64, 16 -; SI-NEXT: v_writelane_b32 v41, s65, 17 -; SI-NEXT: v_writelane_b32 v41, s66, 18 -; SI-NEXT: v_writelane_b32 v41, s67, 19 -; SI-NEXT: v_writelane_b32 v41, s68, 20 -; SI-NEXT: v_writelane_b32 v41, s69, 21 -; SI-NEXT: v_writelane_b32 v41, s70, 22 -; SI-NEXT: v_writelane_b32 v41, s71, 23 ; SI-NEXT: s_mov_b32 s77, s28 ; SI-NEXT: s_mov_b32 s76, s27 -; SI-NEXT: v_writelane_b32 v41, s80, 24 -; SI-NEXT: v_writelane_b32 v41, s81, 25 -; SI-NEXT: v_writelane_b32 v41, s82, 26 -; SI-NEXT: v_writelane_b32 v41, s83, 27 -; SI-NEXT: v_writelane_b32 v41, s84, 28 -; SI-NEXT: v_writelane_b32 v41, s85, 29 -; SI-NEXT: v_writelane_b32 v41, s86, 30 -; SI-NEXT: v_writelane_b32 v41, s87, 31 -; SI-NEXT: v_writelane_b32 v41, s96, 32 -; SI-NEXT: v_writelane_b32 v41, s97, 33 -; SI-NEXT: v_writelane_b32 v41, s98, 34 -; SI-NEXT: v_writelane_b32 v41, s99, 35 ; SI-NEXT: s_mov_b32 s79, s26 ; SI-NEXT: v_readfirstlane_b32 s38, v20 ; SI-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane @@ -154102,6 +155948,17 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; SI-NEXT: v_readfirstlane_b32 s88, v4 ; SI-NEXT: v_readfirstlane_b32 s89, v3 ; SI-NEXT: v_readfirstlane_b32 s90, v9 +; SI-NEXT: v_readfirstlane_b32 s91, v10 +; SI-NEXT: v_readfirstlane_b32 s92, v8 +; SI-NEXT: v_readfirstlane_b32 s93, v7 +; SI-NEXT: v_readfirstlane_b32 s94, v13 +; SI-NEXT: v_readfirstlane_b32 s95, v14 +; SI-NEXT: v_readfirstlane_b32 s30, v17 +; SI-NEXT: v_readfirstlane_b32 s31, v18 +; SI-NEXT: v_readfirstlane_b32 s34, v16 +; SI-NEXT: v_readfirstlane_b32 s35, v15 +; SI-NEXT: v_readfirstlane_b32 s36, v21 +; SI-NEXT: v_readfirstlane_b32 s37, v22 ; SI-NEXT: s_waitcnt vmcnt(7) ; SI-NEXT: v_readfirstlane_b32 s6, v31 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:300 @@ -154137,17 +155994,6 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; SI-NEXT: s_waitcnt vmcnt(12) ; SI-NEXT: v_readfirstlane_b32 s4, v38 ; SI-NEXT: v_writelane_b32 v43, s4, 10 -; SI-NEXT: v_readfirstlane_b32 s91, v10 -; SI-NEXT: v_readfirstlane_b32 s92, v8 -; SI-NEXT: v_readfirstlane_b32 s93, v7 -; SI-NEXT: v_readfirstlane_b32 s94, v13 -; SI-NEXT: v_readfirstlane_b32 s95, v14 -; SI-NEXT: v_readfirstlane_b32 s30, v17 -; SI-NEXT: v_readfirstlane_b32 s31, v18 -; SI-NEXT: v_readfirstlane_b32 s34, v16 -; SI-NEXT: v_readfirstlane_b32 s35, v15 -; SI-NEXT: v_readfirstlane_b32 s36, v21 -; SI-NEXT: v_readfirstlane_b32 s37, v22 ; SI-NEXT: s_waitcnt vmcnt(11) ; SI-NEXT: v_readfirstlane_b32 s4, v31 ; SI-NEXT: v_writelane_b32 v43, s4, 11 @@ -155626,42 +157472,42 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; SI-NEXT: v_readlane_b32 s99, v41, 35 -; SI-NEXT: v_readlane_b32 s98, v41, 34 -; SI-NEXT: v_readlane_b32 s97, v41, 33 -; SI-NEXT: v_readlane_b32 s96, v41, 32 -; SI-NEXT: v_readlane_b32 s87, v41, 31 -; SI-NEXT: v_readlane_b32 s86, v41, 30 -; SI-NEXT: v_readlane_b32 s85, v41, 29 -; SI-NEXT: v_readlane_b32 s84, v41, 28 -; SI-NEXT: v_readlane_b32 s83, v41, 27 -; SI-NEXT: v_readlane_b32 s82, v41, 26 -; SI-NEXT: v_readlane_b32 s81, v41, 25 -; SI-NEXT: v_readlane_b32 s80, v41, 24 -; SI-NEXT: v_readlane_b32 s71, v41, 23 -; SI-NEXT: v_readlane_b32 s70, v41, 22 -; SI-NEXT: v_readlane_b32 s69, v41, 21 -; SI-NEXT: v_readlane_b32 s68, v41, 20 -; SI-NEXT: v_readlane_b32 s67, v41, 19 -; SI-NEXT: v_readlane_b32 s66, v41, 18 -; SI-NEXT: v_readlane_b32 s65, v41, 17 -; SI-NEXT: v_readlane_b32 s64, v41, 16 -; SI-NEXT: v_readlane_b32 s55, v41, 15 -; SI-NEXT: v_readlane_b32 s54, v41, 14 -; SI-NEXT: v_readlane_b32 s53, v41, 13 -; SI-NEXT: v_readlane_b32 s52, v41, 12 -; SI-NEXT: v_readlane_b32 s51, v41, 11 -; SI-NEXT: v_readlane_b32 s50, v41, 10 -; SI-NEXT: v_readlane_b32 s49, v41, 9 -; SI-NEXT: v_readlane_b32 s48, v41, 8 -; SI-NEXT: v_readlane_b32 s39, v41, 7 -; SI-NEXT: v_readlane_b32 s38, v41, 6 -; SI-NEXT: v_readlane_b32 s37, v41, 5 -; SI-NEXT: v_readlane_b32 s36, v41, 4 -; SI-NEXT: v_readlane_b32 s35, v41, 3 -; SI-NEXT: v_readlane_b32 s34, v41, 2 -; SI-NEXT: v_readlane_b32 s31, v41, 1 -; SI-NEXT: v_readlane_b32 s30, v41, 0 +; SI-NEXT: v_readlane_b32 s30, v41, 34 +; SI-NEXT: v_readlane_b32 s31, v41, 35 +; SI-NEXT: v_readlane_b32 s99, v41, 33 +; SI-NEXT: v_readlane_b32 s98, v41, 32 +; SI-NEXT: v_readlane_b32 s97, v41, 31 +; SI-NEXT: v_readlane_b32 s96, v41, 30 +; SI-NEXT: v_readlane_b32 s87, v41, 29 +; SI-NEXT: v_readlane_b32 s86, v41, 28 +; SI-NEXT: v_readlane_b32 s85, v41, 27 +; SI-NEXT: v_readlane_b32 s84, v41, 26 +; SI-NEXT: v_readlane_b32 s83, v41, 25 +; SI-NEXT: v_readlane_b32 s82, v41, 24 +; SI-NEXT: v_readlane_b32 s81, v41, 23 +; SI-NEXT: v_readlane_b32 s80, v41, 22 +; SI-NEXT: v_readlane_b32 s71, v41, 21 +; SI-NEXT: v_readlane_b32 s70, v41, 20 +; SI-NEXT: v_readlane_b32 s69, v41, 19 +; SI-NEXT: v_readlane_b32 s68, v41, 18 +; SI-NEXT: v_readlane_b32 s67, v41, 17 +; SI-NEXT: v_readlane_b32 s66, v41, 16 +; SI-NEXT: v_readlane_b32 s65, v41, 15 +; SI-NEXT: v_readlane_b32 s64, v41, 14 +; SI-NEXT: v_readlane_b32 s55, v41, 13 +; SI-NEXT: v_readlane_b32 s54, v41, 12 +; SI-NEXT: v_readlane_b32 s53, v41, 11 +; SI-NEXT: v_readlane_b32 s52, v41, 10 +; SI-NEXT: v_readlane_b32 s51, v41, 9 +; SI-NEXT: v_readlane_b32 s50, v41, 8 +; SI-NEXT: v_readlane_b32 s49, v41, 7 +; SI-NEXT: v_readlane_b32 s48, v41, 6 +; SI-NEXT: v_readlane_b32 s39, v41, 5 +; SI-NEXT: v_readlane_b32 s38, v41, 4 +; SI-NEXT: v_readlane_b32 s37, v41, 3 +; SI-NEXT: v_readlane_b32 s36, v41, 2 +; SI-NEXT: v_readlane_b32 s35, v41, 1 +; SI-NEXT: v_readlane_b32 s34, v41, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload @@ -157861,35 +159707,65 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1e ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -158633,35 +160509,65 @@ define inreg <64 x bfloat> @bitcast_v128i8_to_v64bf16_scalar(<128 x i8> inreg %a ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1e ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -164681,65 +166587,123 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:124 ; GFX11-TRUE16-NEXT: s_clause 0x1b ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:12 ; GFX11-TRUE16-NEXT: s_clause 0x2 ; GFX11-TRUE16-NEXT: scratch_load_b32 v31, off, s32 offset:8 @@ -165848,26 +167812,47 @@ define <128 x i8> @bitcast_v64bf16_to_v128i8(<64 x bfloat> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x15 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:96 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:92 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -167055,6 +169040,42 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:80 ; SI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; SI-NEXT: buffer_load_dword v34, off, s[0:3], s32 @@ -167076,62 +169097,26 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:64 ; SI-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:76 ; SI-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:72 -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 ; SI-NEXT: s_waitcnt expcnt(5) ; SI-NEXT: v_mul_f32_e32 v56, 1.0, v2 ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v10 -; SI-NEXT: v_writelane_b32 v63, s84, 28 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v9 -; SI-NEXT: v_writelane_b32 v63, s85, 29 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v14 -; SI-NEXT: v_writelane_b32 v63, s86, 30 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v15 -; SI-NEXT: v_writelane_b32 v63, s87, 31 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v18 -; SI-NEXT: v_writelane_b32 v63, s96, 32 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v26 -; SI-NEXT: v_writelane_b32 v63, s97, 33 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; SI-NEXT: v_writelane_b32 v63, s98, 34 ; SI-NEXT: v_mov_b32_e32 v46, v21 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_mul_f32_e32 v47, 1.0, v1 ; SI-NEXT: v_mul_f32_e32 v32, 1.0, v4 ; SI-NEXT: v_mul_f32_e32 v1, 1.0, v3 @@ -167141,8 +169126,6 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_mul_f32_e32 v61, 1.0, v7 ; SI-NEXT: v_mul_f32_e32 v5, 1.0, v12 ; SI-NEXT: v_mul_f32_e32 v60, 1.0, v11 -; SI-NEXT: s_waitcnt vmcnt(14) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: v_mul_f32_e32 v13, 1.0, v13 ; SI-NEXT: v_mul_f32_e32 v21, 1.0, v16 ; SI-NEXT: v_mul_f32_e32 v17, 1.0, v17 @@ -167150,18 +169133,29 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_mul_f32_e32 v12, 1.0, v19 ; SI-NEXT: v_mul_f32_e32 v22, 1.0, v22 ; SI-NEXT: v_mul_f32_e32 v20, 1.0, v46 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mul_f32_e32 v2, 1.0, v48 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill ; SI-NEXT: v_mul_f32_e32 v24, 1.0, v24 ; SI-NEXT: v_mul_f32_e32 v46, 1.0, v23 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mul_f32_e32 v2, 1.0, v52 ; SI-NEXT: v_mul_f32_e32 v26, 1.0, v25 ; SI-NEXT: v_mul_f32_e32 v57, 1.0, v28 ; SI-NEXT: v_mul_f32_e32 v16, 1.0, v27 ; SI-NEXT: v_mul_f32_e32 v28, 1.0, v30 ; SI-NEXT: v_mul_f32_e32 v30, 1.0, v29 +; SI-NEXT: v_mul_f32_e64 v11, 1.0, s16 +; SI-NEXT: v_mul_f32_e64 v3, 1.0, s19 +; SI-NEXT: v_mul_f32_e64 v4, 1.0, s18 +; SI-NEXT: v_mul_f32_e64 v14, 1.0, s21 +; SI-NEXT: v_mul_f32_e64 v15, 1.0, s20 +; SI-NEXT: v_mul_f32_e64 v7, 1.0, s23 +; SI-NEXT: v_mul_f32_e64 v6, 1.0, s22 +; SI-NEXT: v_mul_f32_e64 v18, 1.0, s25 +; SI-NEXT: v_mul_f32_e64 v19, 1.0, s24 +; SI-NEXT: v_mul_f32_e64 v10, 1.0, s27 +; SI-NEXT: v_mul_f32_e64 v8, 1.0, s26 +; SI-NEXT: v_mul_f32_e64 v23, 1.0, s29 +; SI-NEXT: v_mul_f32_e64 v25, 1.0, s28 +; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane +; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: v_mul_f32_e32 v31, 1.0, v33 ; SI-NEXT: v_mul_f32_e32 v27, 1.0, v34 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec @@ -167169,8 +169163,13 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_mul_f32_e32 v36, 1.0, v36 ; SI-NEXT: v_mul_f32_e32 v35, 1.0, v37 ; SI-NEXT: v_mul_f32_e32 v34, 1.0, v38 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v48 +; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill ; SI-NEXT: v_mul_f32_e32 v37, 1.0, v39 ; SI-NEXT: v_mul_f32_e32 v48, 1.0, v49 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_mul_f32_e32 v2, 1.0, v52 ; SI-NEXT: v_mul_f32_e32 v39, 1.0, v50 ; SI-NEXT: v_mul_f32_e32 v33, 1.0, v51 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill @@ -167191,20 +169190,6 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_mul_f32_e32 v53, 1.0, v45 ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mul_f32_e64 v2, 1.0, s17 -; SI-NEXT: v_mul_f32_e64 v11, 1.0, s16 -; SI-NEXT: v_mul_f32_e64 v3, 1.0, s19 -; SI-NEXT: v_mul_f32_e64 v4, 1.0, s18 -; SI-NEXT: v_mul_f32_e64 v14, 1.0, s21 -; SI-NEXT: v_mul_f32_e64 v15, 1.0, s20 -; SI-NEXT: v_mul_f32_e64 v7, 1.0, s23 -; SI-NEXT: v_mul_f32_e64 v6, 1.0, s22 -; SI-NEXT: v_mul_f32_e64 v18, 1.0, s25 -; SI-NEXT: v_mul_f32_e64 v19, 1.0, s24 -; SI-NEXT: v_mul_f32_e64 v10, 1.0, s27 -; SI-NEXT: v_mul_f32_e64 v8, 1.0, s26 -; SI-NEXT: v_mul_f32_e64 v23, 1.0, s29 -; SI-NEXT: v_mul_f32_e64 v25, 1.0, s28 -; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill @@ -168240,24 +170225,23 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-NEXT: s_lshl_b32 s4, s4, 8 ; SI-NEXT: v_readlane_b32 s7, v62, 1 -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s31, v63, 1 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s35, v63, 1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 ; SI-NEXT: v_or_b32_e32 v1, s5, v1 @@ -168467,7 +170451,7 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; SI-NEXT: s_lshl_b32 s5, s34, 8 ; SI-NEXT: s_lshl_b32 s6, s90, 24 -; SI-NEXT: v_readlane_b32 s34, v63, 2 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_and_b32_e32 v3, 0xff, v3 ; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 @@ -168498,8 +170482,9 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; SI-NEXT: s_lshl_b32 s5, s38, 8 ; SI-NEXT: s_lshl_b32 s6, s30, 24 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s30, v63, 34 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s38, v63, 4 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_and_b32_e32 v3, 0xff, v3 ; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 @@ -168527,9 +170512,9 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: s_lshl_b32 s5, s52, 8 ; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload ; SI-NEXT: s_lshl_b32 s6, s48, 24 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s36, v63, 4 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s36, v63, 2 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; SI-NEXT: v_or_b32_e32 v1, v1, v3 @@ -168562,9 +170547,9 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload ; SI-NEXT: s_lshl_b32 s5, s68, 8 ; SI-NEXT: s_lshl_b32 s6, s54, 24 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s50, v63, 10 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s50, v63, 8 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; SI-NEXT: v_or_b32_e32 v1, v1, v3 @@ -168599,9 +170584,9 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; SI-NEXT: s_lshl_b32 s5, s82, 8 ; SI-NEXT: s_lshl_b32 s6, s66, 24 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s64, v63, 16 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s64, v63, 14 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_and_b32_e32 v2, 0xff, v2 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 @@ -168629,9 +170614,9 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload ; SI-NEXT: s_lshl_b32 s5, s96, 8 ; SI-NEXT: s_lshl_b32 s6, s80, 24 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s70, v63, 22 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s70, v63, 20 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 @@ -168663,8 +170648,8 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: v_and_b32_e32 v1, 0xff, v46 ; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload ; SI-NEXT: s_lshl_b32 s5, s86, 24 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s84, v63, 28 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s84, v63, 26 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 @@ -168696,7 +170681,7 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_and_b32_e32 v1, 0xff, v38 -; SI-NEXT: v_readlane_b32 s98, v63, 34 +; SI-NEXT: v_readlane_b32 s98, v63, 32 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 @@ -168736,39 +170721,53 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_writelane_b32 v63, s68, 20 -; VI-NEXT: v_writelane_b32 v63, s69, 21 -; VI-NEXT: v_writelane_b32 v63, s70, 22 -; VI-NEXT: v_writelane_b32 v63, s71, 23 -; VI-NEXT: v_writelane_b32 v63, s80, 24 -; VI-NEXT: v_writelane_b32 v63, s81, 25 -; VI-NEXT: v_writelane_b32 v63, s82, 26 -; VI-NEXT: v_writelane_b32 v63, s83, 27 -; VI-NEXT: v_writelane_b32 v63, s84, 28 -; VI-NEXT: v_writelane_b32 v63, s85, 29 -; VI-NEXT: v_writelane_b32 v63, s86, 30 +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s68, 18 +; VI-NEXT: v_writelane_b32 v63, s69, 19 +; VI-NEXT: v_writelane_b32 v63, s70, 20 +; VI-NEXT: v_writelane_b32 v63, s71, 21 +; VI-NEXT: v_writelane_b32 v63, s80, 22 +; VI-NEXT: v_writelane_b32 v63, s81, 23 +; VI-NEXT: v_writelane_b32 v63, s82, 24 +; VI-NEXT: v_writelane_b32 v63, s83, 25 +; VI-NEXT: v_writelane_b32 v63, s84, 26 +; VI-NEXT: v_writelane_b32 v63, s85, 27 +; VI-NEXT: v_writelane_b32 v63, s86, 28 +; VI-NEXT: v_writelane_b32 v63, s87, 29 +; VI-NEXT: v_writelane_b32 v63, s30, 30 +; VI-NEXT: v_writelane_b32 v63, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v63, s87, 31 ; VI-NEXT: v_readfirstlane_b32 s44, v3 ; VI-NEXT: v_readfirstlane_b32 s45, v4 ; VI-NEXT: v_readfirstlane_b32 s42, v5 @@ -168788,20 +170787,6 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB91_3 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -170160,38 +172145,38 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; VI-NEXT: v_lshlrev_b32_e32 v36, 8, v33 ; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload ; VI-NEXT: v_or_b32_sdwa v17, v17, v36 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_readlane_b32 s87, v63, 31 -; VI-NEXT: v_readlane_b32 s86, v63, 30 -; VI-NEXT: v_readlane_b32 s85, v63, 29 -; VI-NEXT: v_readlane_b32 s84, v63, 28 -; VI-NEXT: v_readlane_b32 s83, v63, 27 -; VI-NEXT: v_readlane_b32 s82, v63, 26 -; VI-NEXT: v_readlane_b32 s81, v63, 25 -; VI-NEXT: v_readlane_b32 s80, v63, 24 -; VI-NEXT: v_readlane_b32 s71, v63, 23 -; VI-NEXT: v_readlane_b32 s70, v63, 22 -; VI-NEXT: v_readlane_b32 s69, v63, 21 -; VI-NEXT: v_readlane_b32 s68, v63, 20 -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 30 +; VI-NEXT: v_readlane_b32 s31, v63, 31 +; VI-NEXT: v_readlane_b32 s87, v63, 29 +; VI-NEXT: v_readlane_b32 s86, v63, 28 +; VI-NEXT: v_readlane_b32 s85, v63, 27 +; VI-NEXT: v_readlane_b32 s84, v63, 26 +; VI-NEXT: v_readlane_b32 s83, v63, 25 +; VI-NEXT: v_readlane_b32 s82, v63, 24 +; VI-NEXT: v_readlane_b32 s81, v63, 23 +; VI-NEXT: v_readlane_b32 s80, v63, 22 +; VI-NEXT: v_readlane_b32 s71, v63, 21 +; VI-NEXT: v_readlane_b32 s70, v63, 20 +; VI-NEXT: v_readlane_b32 s69, v63, 19 +; VI-NEXT: v_readlane_b32 s68, v63, 18 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_lshlrev_b32_e32 v36, 8, v33 ; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload @@ -170590,43 +172575,57 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_writelane_b32 v63, s64, 16 -; GFX9-NEXT: v_writelane_b32 v63, s65, 17 -; GFX9-NEXT: v_writelane_b32 v63, s66, 18 -; GFX9-NEXT: v_writelane_b32 v63, s67, 19 -; GFX9-NEXT: v_writelane_b32 v63, s68, 20 -; GFX9-NEXT: v_writelane_b32 v63, s69, 21 -; GFX9-NEXT: v_writelane_b32 v63, s70, 22 -; GFX9-NEXT: v_writelane_b32 v63, s71, 23 -; GFX9-NEXT: v_writelane_b32 v63, s80, 24 -; GFX9-NEXT: v_writelane_b32 v63, s81, 25 -; GFX9-NEXT: v_writelane_b32 v63, s82, 26 -; GFX9-NEXT: v_writelane_b32 v63, s83, 27 -; GFX9-NEXT: v_writelane_b32 v63, s84, 28 -; GFX9-NEXT: v_writelane_b32 v63, s85, 29 -; GFX9-NEXT: v_writelane_b32 v63, s86, 30 -; GFX9-NEXT: v_writelane_b32 v63, s87, 31 -; GFX9-NEXT: v_writelane_b32 v63, s96, 32 -; GFX9-NEXT: v_writelane_b32 v63, s97, 33 -; GFX9-NEXT: v_writelane_b32 v63, s98, 34 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s64, 14 +; GFX9-NEXT: v_writelane_b32 v63, s65, 15 +; GFX9-NEXT: v_writelane_b32 v63, s66, 16 +; GFX9-NEXT: v_writelane_b32 v63, s67, 17 +; GFX9-NEXT: v_writelane_b32 v63, s68, 18 +; GFX9-NEXT: v_writelane_b32 v63, s69, 19 +; GFX9-NEXT: v_writelane_b32 v63, s70, 20 +; GFX9-NEXT: v_writelane_b32 v63, s71, 21 +; GFX9-NEXT: v_writelane_b32 v63, s80, 22 +; GFX9-NEXT: v_writelane_b32 v63, s81, 23 +; GFX9-NEXT: v_writelane_b32 v63, s82, 24 +; GFX9-NEXT: v_writelane_b32 v63, s83, 25 +; GFX9-NEXT: v_writelane_b32 v63, s84, 26 +; GFX9-NEXT: v_writelane_b32 v63, s85, 27 +; GFX9-NEXT: v_writelane_b32 v63, s86, 28 +; GFX9-NEXT: v_writelane_b32 v63, s87, 29 +; GFX9-NEXT: v_writelane_b32 v63, s96, 30 +; GFX9-NEXT: v_writelane_b32 v63, s97, 31 +; GFX9-NEXT: v_writelane_b32 v63, s98, 32 +; GFX9-NEXT: v_writelane_b32 v63, s99, 33 +; GFX9-NEXT: v_writelane_b32 v63, s30, 34 +; GFX9-NEXT: v_writelane_b32 v63, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v63, s99, 35 ; GFX9-NEXT: v_readfirstlane_b32 s76, v3 ; GFX9-NEXT: v_readfirstlane_b32 s77, v4 ; GFX9-NEXT: v_readfirstlane_b32 s74, v5 @@ -170646,20 +172645,6 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB91_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -172043,42 +174028,42 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: v_or_b32_sdwa v2, v44, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX9-NEXT: v_or_b32_sdwa v1, v26, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_readlane_b32 s99, v63, 35 -; GFX9-NEXT: v_readlane_b32 s98, v63, 34 -; GFX9-NEXT: v_readlane_b32 s97, v63, 33 -; GFX9-NEXT: v_readlane_b32 s96, v63, 32 -; GFX9-NEXT: v_readlane_b32 s87, v63, 31 -; GFX9-NEXT: v_readlane_b32 s86, v63, 30 -; GFX9-NEXT: v_readlane_b32 s85, v63, 29 -; GFX9-NEXT: v_readlane_b32 s84, v63, 28 -; GFX9-NEXT: v_readlane_b32 s83, v63, 27 -; GFX9-NEXT: v_readlane_b32 s82, v63, 26 -; GFX9-NEXT: v_readlane_b32 s81, v63, 25 -; GFX9-NEXT: v_readlane_b32 s80, v63, 24 -; GFX9-NEXT: v_readlane_b32 s71, v63, 23 -; GFX9-NEXT: v_readlane_b32 s70, v63, 22 -; GFX9-NEXT: v_readlane_b32 s69, v63, 21 -; GFX9-NEXT: v_readlane_b32 s68, v63, 20 -; GFX9-NEXT: v_readlane_b32 s67, v63, 19 -; GFX9-NEXT: v_readlane_b32 s66, v63, 18 -; GFX9-NEXT: v_readlane_b32 s65, v63, 17 -; GFX9-NEXT: v_readlane_b32 s64, v63, 16 -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 34 +; GFX9-NEXT: v_readlane_b32 s31, v63, 35 +; GFX9-NEXT: v_readlane_b32 s99, v63, 33 +; GFX9-NEXT: v_readlane_b32 s98, v63, 32 +; GFX9-NEXT: v_readlane_b32 s97, v63, 31 +; GFX9-NEXT: v_readlane_b32 s96, v63, 30 +; GFX9-NEXT: v_readlane_b32 s87, v63, 29 +; GFX9-NEXT: v_readlane_b32 s86, v63, 28 +; GFX9-NEXT: v_readlane_b32 s85, v63, 27 +; GFX9-NEXT: v_readlane_b32 s84, v63, 26 +; GFX9-NEXT: v_readlane_b32 s83, v63, 25 +; GFX9-NEXT: v_readlane_b32 s82, v63, 24 +; GFX9-NEXT: v_readlane_b32 s81, v63, 23 +; GFX9-NEXT: v_readlane_b32 s80, v63, 22 +; GFX9-NEXT: v_readlane_b32 s71, v63, 21 +; GFX9-NEXT: v_readlane_b32 s70, v63, 20 +; GFX9-NEXT: v_readlane_b32 s69, v63, 19 +; GFX9-NEXT: v_readlane_b32 s68, v63, 18 +; GFX9-NEXT: v_readlane_b32 s67, v63, 17 +; GFX9-NEXT: v_readlane_b32 s66, v63, 16 +; GFX9-NEXT: v_readlane_b32 s65, v63, 15 +; GFX9-NEXT: v_readlane_b32 s64, v63, 14 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_or_b32_sdwa v7, v30, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_or_b32_sdwa v7, v7, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -172196,70 +174181,73 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-TRUE16-NEXT: s_clause 0x3 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:12 ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s96, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s50, 8 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s51, 9 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s52, 10 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s53, 11 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s54, 12 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s55, 13 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s64, 14 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s65, 15 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s66, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s67, 17 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s68, 18 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s69, 19 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s70, 20 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s71, 21 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s80, 22 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s81, 23 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s82, 24 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s83, 25 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s84, 26 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s85, 27 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s86, 28 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s87, 29 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s96, 30 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s97, 31 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s98, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s99, 1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s100, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s101, 3 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s102, 4 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s103, 5 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s104, 6 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s30, 7 +; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s31, 8 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s72, v1 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s73, v2 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s97, 1 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s62, v3 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s63, v4 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s60, v5 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s98, 2 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s61, v6 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s58, v7 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s59, v8 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s99, 3 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s46, v9 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s47, v10 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s44, v11 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s100, 4 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s45, v12 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s42, v13 ; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s43, v14 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s101, 5 ; GFX11-TRUE16-NEXT: s_mov_b32 vcc_hi, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s4, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr43 : SGPR spill to VGPR lane ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s102, 6 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s103, 7 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-TRUE16-NEXT: v_writelane_b32 v41, s104, 8 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s49, 9 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s50, 10 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s51, 11 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s52, 12 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s53, 13 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s54, 14 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s55, 15 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s64, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s65, 17 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s66, 18 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s67, 19 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s68, 20 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s69, 21 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s70, 22 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s71, 23 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s80, 24 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s81, 25 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s82, 26 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s83, 27 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s84, 28 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s85, 29 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s86, 30 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s87, 31 ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB91_3 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s27, 24 @@ -173691,6 +175679,7 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff, v10 ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v14, v8, v9 +; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v41, 7 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v15, v17, v2 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v16, v16, v19 ; GFX11-TRUE16-NEXT: v_or_b32_e32 v17, v18, v1 @@ -173702,47 +175691,46 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-NEXT: scratch_store_b128 v0, v[4:7], off offset:80 ; GFX11-TRUE16-NEXT: scratch_store_b128 v0, v[11:14], off offset:96 ; GFX11-TRUE16-NEXT: scratch_store_b128 v0, v[15:18], off offset:112 -; GFX11-TRUE16-NEXT: v_readlane_b32 s104, v41, 8 -; GFX11-TRUE16-NEXT: v_readlane_b32 s103, v41, 7 -; GFX11-TRUE16-NEXT: v_readlane_b32 s102, v41, 6 -; GFX11-TRUE16-NEXT: v_readlane_b32 s101, v41, 5 -; GFX11-TRUE16-NEXT: v_readlane_b32 s100, v41, 4 -; GFX11-TRUE16-NEXT: v_readlane_b32 s99, v41, 3 -; GFX11-TRUE16-NEXT: v_readlane_b32 s98, v41, 2 -; GFX11-TRUE16-NEXT: v_readlane_b32 s97, v41, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s96, v41, 0 -; GFX11-TRUE16-NEXT: v_readlane_b32 s87, v40, 31 -; GFX11-TRUE16-NEXT: v_readlane_b32 s86, v40, 30 -; GFX11-TRUE16-NEXT: v_readlane_b32 s85, v40, 29 -; GFX11-TRUE16-NEXT: v_readlane_b32 s84, v40, 28 -; GFX11-TRUE16-NEXT: v_readlane_b32 s83, v40, 27 -; GFX11-TRUE16-NEXT: v_readlane_b32 s82, v40, 26 -; GFX11-TRUE16-NEXT: v_readlane_b32 s81, v40, 25 -; GFX11-TRUE16-NEXT: v_readlane_b32 s80, v40, 24 -; GFX11-TRUE16-NEXT: v_readlane_b32 s71, v40, 23 -; GFX11-TRUE16-NEXT: v_readlane_b32 s70, v40, 22 -; GFX11-TRUE16-NEXT: v_readlane_b32 s69, v40, 21 -; GFX11-TRUE16-NEXT: v_readlane_b32 s68, v40, 20 -; GFX11-TRUE16-NEXT: v_readlane_b32 s67, v40, 19 -; GFX11-TRUE16-NEXT: v_readlane_b32 s66, v40, 18 -; GFX11-TRUE16-NEXT: v_readlane_b32 s65, v40, 17 -; GFX11-TRUE16-NEXT: v_readlane_b32 s64, v40, 16 -; GFX11-TRUE16-NEXT: v_readlane_b32 s55, v40, 15 -; GFX11-TRUE16-NEXT: v_readlane_b32 s54, v40, 14 -; GFX11-TRUE16-NEXT: v_readlane_b32 s53, v40, 13 -; GFX11-TRUE16-NEXT: v_readlane_b32 s52, v40, 12 -; GFX11-TRUE16-NEXT: v_readlane_b32 s51, v40, 11 -; GFX11-TRUE16-NEXT: v_readlane_b32 s50, v40, 10 -; GFX11-TRUE16-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-TRUE16-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-TRUE16-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-TRUE16-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-TRUE16-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-TRUE16-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-TRUE16-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-TRUE16-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v41, 8 +; GFX11-TRUE16-NEXT: v_readlane_b32 s104, v41, 6 +; GFX11-TRUE16-NEXT: v_readlane_b32 s103, v41, 5 +; GFX11-TRUE16-NEXT: v_readlane_b32 s102, v41, 4 +; GFX11-TRUE16-NEXT: v_readlane_b32 s101, v41, 3 +; GFX11-TRUE16-NEXT: v_readlane_b32 s100, v41, 2 +; GFX11-TRUE16-NEXT: v_readlane_b32 s99, v41, 1 +; GFX11-TRUE16-NEXT: v_readlane_b32 s98, v41, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s97, v40, 31 +; GFX11-TRUE16-NEXT: v_readlane_b32 s96, v40, 30 +; GFX11-TRUE16-NEXT: v_readlane_b32 s87, v40, 29 +; GFX11-TRUE16-NEXT: v_readlane_b32 s86, v40, 28 +; GFX11-TRUE16-NEXT: v_readlane_b32 s85, v40, 27 +; GFX11-TRUE16-NEXT: v_readlane_b32 s84, v40, 26 +; GFX11-TRUE16-NEXT: v_readlane_b32 s83, v40, 25 +; GFX11-TRUE16-NEXT: v_readlane_b32 s82, v40, 24 +; GFX11-TRUE16-NEXT: v_readlane_b32 s81, v40, 23 +; GFX11-TRUE16-NEXT: v_readlane_b32 s80, v40, 22 +; GFX11-TRUE16-NEXT: v_readlane_b32 s71, v40, 21 +; GFX11-TRUE16-NEXT: v_readlane_b32 s70, v40, 20 +; GFX11-TRUE16-NEXT: v_readlane_b32 s69, v40, 19 +; GFX11-TRUE16-NEXT: v_readlane_b32 s68, v40, 18 +; GFX11-TRUE16-NEXT: v_readlane_b32 s67, v40, 17 +; GFX11-TRUE16-NEXT: v_readlane_b32 s66, v40, 16 +; GFX11-TRUE16-NEXT: v_readlane_b32 s65, v40, 15 +; GFX11-TRUE16-NEXT: v_readlane_b32 s64, v40, 14 +; GFX11-TRUE16-NEXT: v_readlane_b32 s55, v40, 13 +; GFX11-TRUE16-NEXT: v_readlane_b32 s54, v40, 12 +; GFX11-TRUE16-NEXT: v_readlane_b32 s53, v40, 11 +; GFX11-TRUE16-NEXT: v_readlane_b32 s52, v40, 10 +; GFX11-TRUE16-NEXT: v_readlane_b32 s51, v40, 9 +; GFX11-TRUE16-NEXT: v_readlane_b32 s50, v40, 8 +; GFX11-TRUE16-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-TRUE16-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-TRUE16-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-TRUE16-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-TRUE16-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-TRUE16-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-TRUE16-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-TRUE16-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-TRUE16-NEXT: s_clause 0x3 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 @@ -173759,70 +175747,73 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-FAKE16-NEXT: s_clause 0x3 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:12 ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s96, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s50, 8 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s51, 9 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s52, 10 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s53, 11 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s54, 12 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s55, 13 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s64, 14 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s65, 15 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s66, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s67, 17 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s68, 18 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s69, 19 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s70, 20 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s71, 21 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s80, 22 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s81, 23 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s82, 24 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s83, 25 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s84, 26 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s85, 27 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s86, 28 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s87, 29 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s96, 30 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s97, 31 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s98, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s99, 1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s100, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s101, 3 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s102, 4 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s103, 5 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s104, 6 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s30, 7 +; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s31, 8 ; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s72, v1 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s73, v2 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s97, 1 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s62, v3 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s63, v4 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s60, v5 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s98, 2 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s61, v6 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s58, v7 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s59, v8 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s99, 3 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s56, v9 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s57, v10 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s46, v11 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s100, 4 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s47, v12 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s44, v13 ; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s45, v14 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s101, 5 ; GFX11-FAKE16-NEXT: s_mov_b32 vcc_hi, 0 ; GFX11-FAKE16-NEXT: s_and_b32 s4, vcc_lo, exec_lo ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr43 : SGPR spill to VGPR lane ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s102, 6 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s103, 7 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-FAKE16-NEXT: v_writelane_b32 v41, s104, 8 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s49, 9 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s50, 10 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s51, 11 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s52, 12 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s53, 13 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s54, 14 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s55, 15 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s64, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s65, 17 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s66, 18 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s67, 19 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s68, 20 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s69, 21 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s70, 22 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s71, 23 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s80, 24 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s81, 25 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s82, 26 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s83, 27 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s84, 28 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s85, 29 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s86, 30 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s87, 31 ; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB91_3 ; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false ; GFX11-FAKE16-NEXT: s_lshr_b32 s4, s27, 24 @@ -175261,6 +177252,7 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v12 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, v3, v4 +; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v41, 7 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v10, v2 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, v11, v18 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v17, v19 @@ -175272,47 +177264,46 @@ define inreg <128 x i8> @bitcast_v64bf16_to_v128i8_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-NEXT: scratch_store_b128 v0, v[6:9], off offset:80 ; GFX11-FAKE16-NEXT: scratch_store_b128 v0, v[13:16], off offset:96 ; GFX11-FAKE16-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 -; GFX11-FAKE16-NEXT: v_readlane_b32 s104, v41, 8 -; GFX11-FAKE16-NEXT: v_readlane_b32 s103, v41, 7 -; GFX11-FAKE16-NEXT: v_readlane_b32 s102, v41, 6 -; GFX11-FAKE16-NEXT: v_readlane_b32 s101, v41, 5 -; GFX11-FAKE16-NEXT: v_readlane_b32 s100, v41, 4 -; GFX11-FAKE16-NEXT: v_readlane_b32 s99, v41, 3 -; GFX11-FAKE16-NEXT: v_readlane_b32 s98, v41, 2 -; GFX11-FAKE16-NEXT: v_readlane_b32 s97, v41, 1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s96, v41, 0 -; GFX11-FAKE16-NEXT: v_readlane_b32 s87, v40, 31 -; GFX11-FAKE16-NEXT: v_readlane_b32 s86, v40, 30 -; GFX11-FAKE16-NEXT: v_readlane_b32 s85, v40, 29 -; GFX11-FAKE16-NEXT: v_readlane_b32 s84, v40, 28 -; GFX11-FAKE16-NEXT: v_readlane_b32 s83, v40, 27 -; GFX11-FAKE16-NEXT: v_readlane_b32 s82, v40, 26 -; GFX11-FAKE16-NEXT: v_readlane_b32 s81, v40, 25 -; GFX11-FAKE16-NEXT: v_readlane_b32 s80, v40, 24 -; GFX11-FAKE16-NEXT: v_readlane_b32 s71, v40, 23 -; GFX11-FAKE16-NEXT: v_readlane_b32 s70, v40, 22 -; GFX11-FAKE16-NEXT: v_readlane_b32 s69, v40, 21 -; GFX11-FAKE16-NEXT: v_readlane_b32 s68, v40, 20 -; GFX11-FAKE16-NEXT: v_readlane_b32 s67, v40, 19 -; GFX11-FAKE16-NEXT: v_readlane_b32 s66, v40, 18 -; GFX11-FAKE16-NEXT: v_readlane_b32 s65, v40, 17 -; GFX11-FAKE16-NEXT: v_readlane_b32 s64, v40, 16 -; GFX11-FAKE16-NEXT: v_readlane_b32 s55, v40, 15 -; GFX11-FAKE16-NEXT: v_readlane_b32 s54, v40, 14 -; GFX11-FAKE16-NEXT: v_readlane_b32 s53, v40, 13 -; GFX11-FAKE16-NEXT: v_readlane_b32 s52, v40, 12 -; GFX11-FAKE16-NEXT: v_readlane_b32 s51, v40, 11 -; GFX11-FAKE16-NEXT: v_readlane_b32 s50, v40, 10 -; GFX11-FAKE16-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-FAKE16-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-FAKE16-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-FAKE16-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-FAKE16-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-FAKE16-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-FAKE16-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-FAKE16-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v41, 8 +; GFX11-FAKE16-NEXT: v_readlane_b32 s104, v41, 6 +; GFX11-FAKE16-NEXT: v_readlane_b32 s103, v41, 5 +; GFX11-FAKE16-NEXT: v_readlane_b32 s102, v41, 4 +; GFX11-FAKE16-NEXT: v_readlane_b32 s101, v41, 3 +; GFX11-FAKE16-NEXT: v_readlane_b32 s100, v41, 2 +; GFX11-FAKE16-NEXT: v_readlane_b32 s99, v41, 1 +; GFX11-FAKE16-NEXT: v_readlane_b32 s98, v41, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s97, v40, 31 +; GFX11-FAKE16-NEXT: v_readlane_b32 s96, v40, 30 +; GFX11-FAKE16-NEXT: v_readlane_b32 s87, v40, 29 +; GFX11-FAKE16-NEXT: v_readlane_b32 s86, v40, 28 +; GFX11-FAKE16-NEXT: v_readlane_b32 s85, v40, 27 +; GFX11-FAKE16-NEXT: v_readlane_b32 s84, v40, 26 +; GFX11-FAKE16-NEXT: v_readlane_b32 s83, v40, 25 +; GFX11-FAKE16-NEXT: v_readlane_b32 s82, v40, 24 +; GFX11-FAKE16-NEXT: v_readlane_b32 s81, v40, 23 +; GFX11-FAKE16-NEXT: v_readlane_b32 s80, v40, 22 +; GFX11-FAKE16-NEXT: v_readlane_b32 s71, v40, 21 +; GFX11-FAKE16-NEXT: v_readlane_b32 s70, v40, 20 +; GFX11-FAKE16-NEXT: v_readlane_b32 s69, v40, 19 +; GFX11-FAKE16-NEXT: v_readlane_b32 s68, v40, 18 +; GFX11-FAKE16-NEXT: v_readlane_b32 s67, v40, 17 +; GFX11-FAKE16-NEXT: v_readlane_b32 s66, v40, 16 +; GFX11-FAKE16-NEXT: v_readlane_b32 s65, v40, 15 +; GFX11-FAKE16-NEXT: v_readlane_b32 s64, v40, 14 +; GFX11-FAKE16-NEXT: v_readlane_b32 s55, v40, 13 +; GFX11-FAKE16-NEXT: v_readlane_b32 s54, v40, 12 +; GFX11-FAKE16-NEXT: v_readlane_b32 s53, v40, 11 +; GFX11-FAKE16-NEXT: v_readlane_b32 s52, v40, 10 +; GFX11-FAKE16-NEXT: v_readlane_b32 s51, v40, 9 +; GFX11-FAKE16-NEXT: v_readlane_b32 s50, v40, 8 +; GFX11-FAKE16-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-FAKE16-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-FAKE16-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-FAKE16-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-FAKE16-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-FAKE16-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-FAKE16-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-FAKE16-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-FAKE16-NEXT: s_clause 0x3 ; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 @@ -180797,53 +182788,99 @@ define <64 x half> @bitcast_v128i8_to_v64f16(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:456 ; GFX11-FAKE16-NEXT: s_clause 0xf ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v37, v30 :: v_dual_mov_b32 v54, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v55, v28 :: v_dual_mov_b32 v52, v26 @@ -181735,60 +183772,71 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:332 ; SI-NEXT: ; implicit-def: $vgpr61 : SGPR spill to VGPR lane ; SI-NEXT: s_mov_b32 s10, s16 -; SI-NEXT: s_waitcnt expcnt(1) ; SI-NEXT: v_writelane_b32 v61, s29, 0 ; SI-NEXT: v_writelane_b32 v61, s28, 1 ; SI-NEXT: v_writelane_b32 v61, s27, 2 ; SI-NEXT: s_mov_b32 s61, s21 -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_writelane_b32 v63, s97, 33 ; SI-NEXT: s_mov_b32 s67, s19 ; SI-NEXT: s_mov_b32 s54, s17 ; SI-NEXT: s_mov_b32 s35, s23 ; SI-NEXT: s_mov_b32 s39, s26 ; SI-NEXT: s_mov_b32 s62, s25 -; SI-NEXT: v_writelane_b32 v63, s98, 34 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s99, v1 ; SI-NEXT: v_readfirstlane_b32 s74, v24 ; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: v_readfirstlane_b32 s6, v23 -; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_writelane_b32 v62, s74, 0 ; SI-NEXT: v_readfirstlane_b32 s12, v26 ; SI-NEXT: v_writelane_b32 v62, s6, 1 @@ -181819,10 +183867,6 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s42, v20 ; SI-NEXT: v_readfirstlane_b32 s43, v19 ; SI-NEXT: v_readfirstlane_b32 s44, v22 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_readfirstlane_b32 s4, v31 -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:328 -; SI-NEXT: v_writelane_b32 v61, s4, 3 ; SI-NEXT: v_readfirstlane_b32 s45, v21 ; SI-NEXT: v_readfirstlane_b32 s98, v10 ; SI-NEXT: v_readfirstlane_b32 s90, v8 @@ -181830,28 +183874,19 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s91, v6 ; SI-NEXT: v_readfirstlane_b32 s93, v4 ; SI-NEXT: v_readfirstlane_b32 s55, v2 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill ; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:336 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_readfirstlane_b32 s4, v31 -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:324 -; SI-NEXT: v_writelane_b32 v61, s4, 4 +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:328 +; SI-NEXT: v_writelane_b32 v61, s4, 3 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_readfirstlane_b32 s4, v31 +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:324 +; SI-NEXT: v_writelane_b32 v61, s4, 4 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_readfirstlane_b32 s4, v31 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:320 ; SI-NEXT: v_writelane_b32 v61, s4, 5 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -182980,7 +185015,7 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; SI-NEXT: v_cvt_f16_f32_e32 v5, v5 ; SI-NEXT: v_cvt_f16_f32_e32 v6, v6 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NEXT: v_readlane_b32 s99, v63, 35 +; SI-NEXT: v_readlane_b32 s30, v63, 34 ; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; SI-NEXT: v_or_b32_e32 v5, v6, v5 ; SI-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen @@ -182988,41 +185023,41 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_waitcnt vmcnt(1) ; SI-NEXT: v_cvt_f16_f32_e32 v5, v5 ; SI-NEXT: s_waitcnt vmcnt(0) @@ -185482,35 +187517,65 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1e ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -186254,35 +188319,65 @@ define inreg <64 x half> @bitcast_v128i8_to_v64f16_scalar(<128 x i8> inreg %a, i ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1e ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -187095,7 +189190,6 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-LABEL: bitcast_v64f16_to_v128i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill @@ -187112,6 +189206,7 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:136 ; SI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4 ; SI-NEXT: buffer_load_dword v36, off, s[0:3], s32 @@ -187140,16 +189235,16 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:80 ; SI-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:92 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v33, v4 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v3 -; SI-NEXT: v_cvt_f16_f32_e32 v33, v4 ; SI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill ; SI-NEXT: v_cvt_f16_f32_e32 v31, v12 +; SI-NEXT: v_cvt_f16_f32_e32 v12, v16 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v6 -; SI-NEXT: v_cvt_f16_f32_e32 v12, v16 ; SI-NEXT: v_cvt_f16_f32_e32 v32, v8 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) @@ -187185,26 +189280,27 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v18 ; SI-NEXT: ; implicit-def: $vgpr18 +; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v34 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:548 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v17 -; SI-NEXT: s_waitcnt vmcnt(14) ; SI-NEXT: v_cvt_f16_f32_e32 v10, v35 -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v34 ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: v_cvt_f16_f32_e32 v8, v39 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:536 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v19 -; SI-NEXT: v_cvt_f16_f32_e32 v8, v39 ; SI-NEXT: ; implicit-def: $vgpr39 ; SI-NEXT: ; implicit-def: $vgpr19 ; SI-NEXT: v_cvt_f16_f32_e32 v7, v51 +; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:524 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v22 -; SI-NEXT: v_cvt_f16_f32_e32 v56, v53 ; SI-NEXT: v_cvt_f16_f32_e32 v5, v55 +; SI-NEXT: v_cvt_f16_f32_e32 v56, v53 ; SI-NEXT: v_cvt_f16_f32_e32 v47, v54 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:600 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) @@ -187217,6 +189313,7 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: v_cvt_f16_f32_e32 v1, v23 ; SI-NEXT: v_cvt_f16_f32_e32 v62, v60 ; SI-NEXT: v_cvt_f16_f32_e32 v61, v61 +; SI-NEXT: s_waitcnt vmcnt(14) ; SI-NEXT: v_cvt_f16_f32_e32 v60, v45 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:584 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) @@ -187227,7 +189324,6 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr54 ; SI-NEXT: ; implicit-def: $vgpr53 -; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: ; implicit-def: $vgpr22 ; SI-NEXT: ; implicit-def: $vgpr23 @@ -187285,7 +189381,6 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v63 -; SI-NEXT: s_waitcnt vmcnt(14) ; SI-NEXT: v_cvt_f16_f32_e32 v63, v46 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) @@ -191635,24 +193730,43 @@ define <128 x i8> @bitcast_v64f16_to_v128i8(<64 x half> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -192355,6 +194469,42 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; SI-NEXT: v_writelane_b32 v63, s34, 0 +; SI-NEXT: v_writelane_b32 v63, s35, 1 +; SI-NEXT: v_writelane_b32 v63, s36, 2 +; SI-NEXT: v_writelane_b32 v63, s37, 3 +; SI-NEXT: v_writelane_b32 v63, s38, 4 +; SI-NEXT: v_writelane_b32 v63, s39, 5 +; SI-NEXT: v_writelane_b32 v63, s48, 6 +; SI-NEXT: v_writelane_b32 v63, s49, 7 +; SI-NEXT: v_writelane_b32 v63, s50, 8 +; SI-NEXT: v_writelane_b32 v63, s51, 9 +; SI-NEXT: v_writelane_b32 v63, s52, 10 +; SI-NEXT: v_writelane_b32 v63, s53, 11 +; SI-NEXT: v_writelane_b32 v63, s54, 12 +; SI-NEXT: v_writelane_b32 v63, s55, 13 +; SI-NEXT: v_writelane_b32 v63, s64, 14 +; SI-NEXT: v_writelane_b32 v63, s65, 15 +; SI-NEXT: v_writelane_b32 v63, s66, 16 +; SI-NEXT: v_writelane_b32 v63, s67, 17 +; SI-NEXT: v_writelane_b32 v63, s68, 18 +; SI-NEXT: v_writelane_b32 v63, s69, 19 +; SI-NEXT: v_writelane_b32 v63, s70, 20 +; SI-NEXT: v_writelane_b32 v63, s71, 21 +; SI-NEXT: v_writelane_b32 v63, s80, 22 +; SI-NEXT: v_writelane_b32 v63, s81, 23 +; SI-NEXT: v_writelane_b32 v63, s82, 24 +; SI-NEXT: v_writelane_b32 v63, s83, 25 +; SI-NEXT: v_writelane_b32 v63, s84, 26 +; SI-NEXT: v_writelane_b32 v63, s85, 27 +; SI-NEXT: v_writelane_b32 v63, s86, 28 +; SI-NEXT: v_writelane_b32 v63, s87, 29 +; SI-NEXT: v_writelane_b32 v63, s96, 30 +; SI-NEXT: v_writelane_b32 v63, s97, 31 +; SI-NEXT: v_writelane_b32 v63, s98, 32 +; SI-NEXT: v_writelane_b32 v63, s99, 33 +; SI-NEXT: v_writelane_b32 v63, s30, 34 +; SI-NEXT: v_writelane_b32 v63, s31, 35 ; SI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:80 ; SI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:4 ; SI-NEXT: buffer_load_dword v36, off, s[0:3], s32 @@ -192380,92 +194530,68 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; SI-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:64 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:76 ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:72 -; SI-NEXT: v_writelane_b32 v63, s30, 0 -; SI-NEXT: v_writelane_b32 v63, s31, 1 -; SI-NEXT: v_writelane_b32 v63, s34, 2 -; SI-NEXT: v_writelane_b32 v63, s35, 3 -; SI-NEXT: v_writelane_b32 v63, s36, 4 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; SI-NEXT: v_writelane_b32 v63, s37, 5 -; SI-NEXT: v_writelane_b32 v63, s38, 6 -; SI-NEXT: v_writelane_b32 v63, s39, 7 -; SI-NEXT: v_writelane_b32 v63, s48, 8 +; SI-NEXT: v_mov_b32_e32 v46, v29 +; SI-NEXT: v_cvt_f16_f32_e32 v47, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v2, v4 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v3 ; SI-NEXT: v_cvt_f16_f32_e32 v3, v7 -; SI-NEXT: v_writelane_b32 v63, s49, 9 -; SI-NEXT: v_writelane_b32 v63, s50, 10 -; SI-NEXT: v_writelane_b32 v63, s51, 11 -; SI-NEXT: v_writelane_b32 v63, s52, 12 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v3, v9 -; SI-NEXT: v_writelane_b32 v63, s53, 13 -; SI-NEXT: v_writelane_b32 v63, s54, 14 -; SI-NEXT: v_writelane_b32 v63, s55, 15 -; SI-NEXT: v_writelane_b32 v63, s64, 16 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v3, v26 -; SI-NEXT: v_writelane_b32 v63, s65, 17 -; SI-NEXT: v_writelane_b32 v63, s66, 18 -; SI-NEXT: v_writelane_b32 v63, s67, 19 -; SI-NEXT: v_writelane_b32 v63, s68, 20 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v3, v25 -; SI-NEXT: v_writelane_b32 v63, s69, 21 -; SI-NEXT: v_writelane_b32 v63, s70, 22 -; SI-NEXT: v_writelane_b32 v63, s71, 23 -; SI-NEXT: v_writelane_b32 v63, s80, 24 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v3, v30 -; SI-NEXT: v_writelane_b32 v63, s81, 25 -; SI-NEXT: v_writelane_b32 v63, s82, 26 -; SI-NEXT: v_writelane_b32 v63, s83, 27 -; SI-NEXT: v_writelane_b32 v63, s84, 28 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; SI-NEXT: v_writelane_b32 v63, s85, 29 -; SI-NEXT: v_writelane_b32 v63, s86, 30 -; SI-NEXT: v_writelane_b32 v63, s87, 31 -; SI-NEXT: v_mov_b32_e32 v46, v29 -; SI-NEXT: v_writelane_b32 v63, s96, 32 -; SI-NEXT: v_cvt_f16_f32_e32 v47, v2 -; SI-NEXT: v_cvt_f16_f32_e32 v2, v4 ; SI-NEXT: v_cvt_f16_f32_e32 v33, v6 ; SI-NEXT: v_cvt_f16_f32_e32 v43, v5 ; SI-NEXT: v_cvt_f16_f32_e32 v5, v8 +; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_cvt_f16_f32_e32 v3, v9 ; SI-NEXT: v_cvt_f16_f32_e32 v32, v10 ; SI-NEXT: v_cvt_f16_f32_e32 v7, v12 ; SI-NEXT: v_cvt_f16_f32_e32 v29, v11 +; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_cvt_f16_f32_e32 v3, v26 ; SI-NEXT: v_cvt_f16_f32_e32 v31, v14 ; SI-NEXT: v_cvt_f16_f32_e32 v6, v13 ; SI-NEXT: v_cvt_f16_f32_e32 v58, v16 +; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_cvt_f16_f32_e32 v3, v25 ; SI-NEXT: v_cvt_f16_f32_e32 v13, v15 -; SI-NEXT: s_waitcnt vmcnt(14) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v34 ; SI-NEXT: v_cvt_f16_f32_e32 v10, v18 ; SI-NEXT: v_cvt_f16_f32_e32 v11, v17 +; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_cvt_f16_f32_e32 v3, v37 -; SI-NEXT: v_cvt_f16_f32_e32 v34, v20 +; SI-NEXT: v_cvt_f16_f32_e32 v3, v30 ; SI-NEXT: v_cvt_f16_f32_e32 v16, v19 ; SI-NEXT: v_cvt_f16_f32_e32 v9, v22 ; SI-NEXT: v_cvt_f16_f32_e32 v61, v21 +; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; SI-NEXT: v_cvt_f16_f32_e32 v21, v24 ; SI-NEXT: v_cvt_f16_f32_e32 v24, v23 ; SI-NEXT: v_cvt_f16_f32_e32 v44, v28 ; SI-NEXT: v_cvt_f16_f32_e32 v42, v27 ; SI-NEXT: v_cvt_f16_f32_e32 v46, v46 +; SI-NEXT: v_cvt_f16_f32_e32 v19, s17 +; SI-NEXT: v_cvt_f16_f32_e32 v18, s16 +; SI-NEXT: v_cvt_f16_f32_e32 v12, s19 +; SI-NEXT: v_cvt_f16_f32_e32 v17, s18 +; SI-NEXT: v_cvt_f16_f32_e32 v22, s22 +; SI-NEXT: v_cvt_f16_f32_e32 v15, s25 +; SI-NEXT: v_cvt_f16_f32_e32 v14, s24 +; SI-NEXT: v_cvt_f16_f32_e32 v30, s27 +; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane +; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v34 +; SI-NEXT: v_cvt_f16_f32_e32 v34, v20 ; SI-NEXT: v_cvt_f16_f32_e32 v8, v35 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_cvt_f16_f32_e32 v3, v37 ; SI-NEXT: v_cvt_f16_f32_e32 v23, v36 +; SI-NEXT: v_cvt_f16_f32_e32 v27, v39 +; SI-NEXT: v_cvt_f16_f32_e32 v28, v48 ; SI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v3, v38 -; SI-NEXT: v_cvt_f16_f32_e32 v27, v39 -; SI-NEXT: v_cvt_f16_f32_e32 v28, v48 ; SI-NEXT: v_cvt_f16_f32_e32 v4, v49 ; SI-NEXT: v_cvt_f16_f32_e32 v45, v45 ; SI-NEXT: v_cvt_f16_f32_e32 v36, v56 @@ -192488,25 +194614,13 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; SI-NEXT: v_cvt_f16_f32_e32 v53, v40 ; SI-NEXT: s_waitcnt vmcnt(7) ; SI-NEXT: v_cvt_f16_f32_e32 v55, v41 -; SI-NEXT: v_cvt_f16_f32_e32 v19, s17 -; SI-NEXT: v_cvt_f16_f32_e32 v18, s16 -; SI-NEXT: v_cvt_f16_f32_e32 v12, s19 -; SI-NEXT: v_cvt_f16_f32_e32 v17, s18 ; SI-NEXT: v_cvt_f16_f32_e32 v38, s21 ; SI-NEXT: v_cvt_f16_f32_e32 v37, s20 ; SI-NEXT: v_cvt_f16_f32_e32 v48, s23 -; SI-NEXT: v_cvt_f16_f32_e32 v22, s22 -; SI-NEXT: v_cvt_f16_f32_e32 v15, s25 -; SI-NEXT: v_cvt_f16_f32_e32 v14, s24 -; SI-NEXT: v_cvt_f16_f32_e32 v30, s27 ; SI-NEXT: v_cvt_f16_f32_e32 v39, s26 ; SI-NEXT: v_cvt_f16_f32_e32 v35, s29 ; SI-NEXT: v_cvt_f16_f32_e32 v20, s28 -; SI-NEXT: v_writelane_b32 v63, s97, 33 -; SI-NEXT: v_writelane_b32 v63, s98, 34 -; SI-NEXT: v_writelane_b32 v63, s99, 35 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; SI-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill @@ -193994,6 +196108,7 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; SI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload +; SI-NEXT: v_readlane_b32 s30, v63, 34 ; SI-NEXT: v_readlane_b32 s45, v62, 17 ; SI-NEXT: v_readlane_b32 s43, v62, 23 ; SI-NEXT: v_readlane_b32 s41, v62, 29 @@ -194001,42 +196116,41 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; SI-NEXT: v_readlane_b32 s27, v62, 41 ; SI-NEXT: v_readlane_b32 s25, v62, 45 ; SI-NEXT: v_readlane_b32 s9, v62, 49 -; SI-NEXT: v_readlane_b32 s99, v63, 35 -; SI-NEXT: v_readlane_b32 s98, v63, 34 -; SI-NEXT: v_readlane_b32 s97, v63, 33 -; SI-NEXT: v_readlane_b32 s96, v63, 32 -; SI-NEXT: v_readlane_b32 s87, v63, 31 -; SI-NEXT: v_readlane_b32 s86, v63, 30 -; SI-NEXT: v_readlane_b32 s85, v63, 29 -; SI-NEXT: v_readlane_b32 s84, v63, 28 -; SI-NEXT: v_readlane_b32 s83, v63, 27 -; SI-NEXT: v_readlane_b32 s82, v63, 26 -; SI-NEXT: v_readlane_b32 s81, v63, 25 -; SI-NEXT: v_readlane_b32 s80, v63, 24 -; SI-NEXT: v_readlane_b32 s71, v63, 23 -; SI-NEXT: v_readlane_b32 s70, v63, 22 -; SI-NEXT: v_readlane_b32 s69, v63, 21 -; SI-NEXT: v_readlane_b32 s68, v63, 20 -; SI-NEXT: v_readlane_b32 s67, v63, 19 -; SI-NEXT: v_readlane_b32 s66, v63, 18 -; SI-NEXT: v_readlane_b32 s65, v63, 17 -; SI-NEXT: v_readlane_b32 s64, v63, 16 -; SI-NEXT: v_readlane_b32 s55, v63, 15 -; SI-NEXT: v_readlane_b32 s54, v63, 14 -; SI-NEXT: v_readlane_b32 s53, v63, 13 -; SI-NEXT: v_readlane_b32 s52, v63, 12 -; SI-NEXT: v_readlane_b32 s51, v63, 11 -; SI-NEXT: v_readlane_b32 s50, v63, 10 -; SI-NEXT: v_readlane_b32 s49, v63, 9 -; SI-NEXT: v_readlane_b32 s48, v63, 8 -; SI-NEXT: v_readlane_b32 s39, v63, 7 -; SI-NEXT: v_readlane_b32 s38, v63, 6 -; SI-NEXT: v_readlane_b32 s37, v63, 5 -; SI-NEXT: v_readlane_b32 s36, v63, 4 -; SI-NEXT: v_readlane_b32 s35, v63, 3 -; SI-NEXT: v_readlane_b32 s34, v63, 2 -; SI-NEXT: v_readlane_b32 s31, v63, 1 -; SI-NEXT: v_readlane_b32 s30, v63, 0 +; SI-NEXT: v_readlane_b32 s31, v63, 35 +; SI-NEXT: v_readlane_b32 s99, v63, 33 +; SI-NEXT: v_readlane_b32 s98, v63, 32 +; SI-NEXT: v_readlane_b32 s97, v63, 31 +; SI-NEXT: v_readlane_b32 s96, v63, 30 +; SI-NEXT: v_readlane_b32 s87, v63, 29 +; SI-NEXT: v_readlane_b32 s86, v63, 28 +; SI-NEXT: v_readlane_b32 s85, v63, 27 +; SI-NEXT: v_readlane_b32 s84, v63, 26 +; SI-NEXT: v_readlane_b32 s83, v63, 25 +; SI-NEXT: v_readlane_b32 s82, v63, 24 +; SI-NEXT: v_readlane_b32 s81, v63, 23 +; SI-NEXT: v_readlane_b32 s80, v63, 22 +; SI-NEXT: v_readlane_b32 s71, v63, 21 +; SI-NEXT: v_readlane_b32 s70, v63, 20 +; SI-NEXT: v_readlane_b32 s69, v63, 19 +; SI-NEXT: v_readlane_b32 s68, v63, 18 +; SI-NEXT: v_readlane_b32 s67, v63, 17 +; SI-NEXT: v_readlane_b32 s66, v63, 16 +; SI-NEXT: v_readlane_b32 s65, v63, 15 +; SI-NEXT: v_readlane_b32 s64, v63, 14 +; SI-NEXT: v_readlane_b32 s55, v63, 13 +; SI-NEXT: v_readlane_b32 s54, v63, 12 +; SI-NEXT: v_readlane_b32 s53, v63, 11 +; SI-NEXT: v_readlane_b32 s52, v63, 10 +; SI-NEXT: v_readlane_b32 s51, v63, 9 +; SI-NEXT: v_readlane_b32 s50, v63, 8 +; SI-NEXT: v_readlane_b32 s49, v63, 7 +; SI-NEXT: v_readlane_b32 s48, v63, 6 +; SI-NEXT: v_readlane_b32 s39, v63, 5 +; SI-NEXT: v_readlane_b32 s38, v63, 4 +; SI-NEXT: v_readlane_b32 s37, v63, 3 +; SI-NEXT: v_readlane_b32 s36, v63, 2 +; SI-NEXT: v_readlane_b32 s35, v63, 1 +; SI-NEXT: v_readlane_b32 s34, v63, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload @@ -194051,39 +196165,53 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_writelane_b32 v63, s68, 20 -; VI-NEXT: v_writelane_b32 v63, s69, 21 -; VI-NEXT: v_writelane_b32 v63, s70, 22 -; VI-NEXT: v_writelane_b32 v63, s71, 23 -; VI-NEXT: v_writelane_b32 v63, s80, 24 -; VI-NEXT: v_writelane_b32 v63, s81, 25 -; VI-NEXT: v_writelane_b32 v63, s82, 26 -; VI-NEXT: v_writelane_b32 v63, s83, 27 -; VI-NEXT: v_writelane_b32 v63, s84, 28 -; VI-NEXT: v_writelane_b32 v63, s85, 29 -; VI-NEXT: v_writelane_b32 v63, s86, 30 +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s68, 18 +; VI-NEXT: v_writelane_b32 v63, s69, 19 +; VI-NEXT: v_writelane_b32 v63, s70, 20 +; VI-NEXT: v_writelane_b32 v63, s71, 21 +; VI-NEXT: v_writelane_b32 v63, s80, 22 +; VI-NEXT: v_writelane_b32 v63, s81, 23 +; VI-NEXT: v_writelane_b32 v63, s82, 24 +; VI-NEXT: v_writelane_b32 v63, s83, 25 +; VI-NEXT: v_writelane_b32 v63, s84, 26 +; VI-NEXT: v_writelane_b32 v63, s85, 27 +; VI-NEXT: v_writelane_b32 v63, s86, 28 +; VI-NEXT: v_writelane_b32 v63, s87, 29 +; VI-NEXT: v_writelane_b32 v63, s30, 30 +; VI-NEXT: v_writelane_b32 v63, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v63, s87, 31 ; VI-NEXT: v_readfirstlane_b32 s44, v3 ; VI-NEXT: v_readfirstlane_b32 s45, v4 ; VI-NEXT: v_readfirstlane_b32 s42, v5 @@ -194103,20 +196231,6 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB95_3 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -194998,38 +197112,38 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; VI-NEXT: v_lshlrev_b32_e32 v18, 8, v18 ; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; VI-NEXT: v_or_b32_sdwa v1, v61, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_readlane_b32 s87, v63, 31 -; VI-NEXT: v_readlane_b32 s86, v63, 30 -; VI-NEXT: v_readlane_b32 s85, v63, 29 -; VI-NEXT: v_readlane_b32 s84, v63, 28 -; VI-NEXT: v_readlane_b32 s83, v63, 27 -; VI-NEXT: v_readlane_b32 s82, v63, 26 -; VI-NEXT: v_readlane_b32 s81, v63, 25 -; VI-NEXT: v_readlane_b32 s80, v63, 24 -; VI-NEXT: v_readlane_b32 s71, v63, 23 -; VI-NEXT: v_readlane_b32 s70, v63, 22 -; VI-NEXT: v_readlane_b32 s69, v63, 21 -; VI-NEXT: v_readlane_b32 s68, v63, 20 -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 30 +; VI-NEXT: v_readlane_b32 s31, v63, 31 +; VI-NEXT: v_readlane_b32 s87, v63, 29 +; VI-NEXT: v_readlane_b32 s86, v63, 28 +; VI-NEXT: v_readlane_b32 s85, v63, 27 +; VI-NEXT: v_readlane_b32 s84, v63, 26 +; VI-NEXT: v_readlane_b32 s83, v63, 25 +; VI-NEXT: v_readlane_b32 s82, v63, 24 +; VI-NEXT: v_readlane_b32 s81, v63, 23 +; VI-NEXT: v_readlane_b32 s80, v63, 22 +; VI-NEXT: v_readlane_b32 s71, v63, 21 +; VI-NEXT: v_readlane_b32 s70, v63, 20 +; VI-NEXT: v_readlane_b32 s69, v63, 19 +; VI-NEXT: v_readlane_b32 s68, v63, 18 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_or_b32_sdwa v58, v23, v58 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload @@ -195410,43 +197524,57 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_writelane_b32 v63, s64, 16 -; GFX9-NEXT: v_writelane_b32 v63, s65, 17 -; GFX9-NEXT: v_writelane_b32 v63, s66, 18 -; GFX9-NEXT: v_writelane_b32 v63, s67, 19 -; GFX9-NEXT: v_writelane_b32 v63, s68, 20 -; GFX9-NEXT: v_writelane_b32 v63, s69, 21 -; GFX9-NEXT: v_writelane_b32 v63, s70, 22 -; GFX9-NEXT: v_writelane_b32 v63, s71, 23 -; GFX9-NEXT: v_writelane_b32 v63, s80, 24 -; GFX9-NEXT: v_writelane_b32 v63, s81, 25 -; GFX9-NEXT: v_writelane_b32 v63, s82, 26 -; GFX9-NEXT: v_writelane_b32 v63, s83, 27 -; GFX9-NEXT: v_writelane_b32 v63, s84, 28 -; GFX9-NEXT: v_writelane_b32 v63, s85, 29 -; GFX9-NEXT: v_writelane_b32 v63, s86, 30 -; GFX9-NEXT: v_writelane_b32 v63, s87, 31 -; GFX9-NEXT: v_writelane_b32 v63, s96, 32 -; GFX9-NEXT: v_writelane_b32 v63, s97, 33 -; GFX9-NEXT: v_writelane_b32 v63, s98, 34 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s64, 14 +; GFX9-NEXT: v_writelane_b32 v63, s65, 15 +; GFX9-NEXT: v_writelane_b32 v63, s66, 16 +; GFX9-NEXT: v_writelane_b32 v63, s67, 17 +; GFX9-NEXT: v_writelane_b32 v63, s68, 18 +; GFX9-NEXT: v_writelane_b32 v63, s69, 19 +; GFX9-NEXT: v_writelane_b32 v63, s70, 20 +; GFX9-NEXT: v_writelane_b32 v63, s71, 21 +; GFX9-NEXT: v_writelane_b32 v63, s80, 22 +; GFX9-NEXT: v_writelane_b32 v63, s81, 23 +; GFX9-NEXT: v_writelane_b32 v63, s82, 24 +; GFX9-NEXT: v_writelane_b32 v63, s83, 25 +; GFX9-NEXT: v_writelane_b32 v63, s84, 26 +; GFX9-NEXT: v_writelane_b32 v63, s85, 27 +; GFX9-NEXT: v_writelane_b32 v63, s86, 28 +; GFX9-NEXT: v_writelane_b32 v63, s87, 29 +; GFX9-NEXT: v_writelane_b32 v63, s96, 30 +; GFX9-NEXT: v_writelane_b32 v63, s97, 31 +; GFX9-NEXT: v_writelane_b32 v63, s98, 32 +; GFX9-NEXT: v_writelane_b32 v63, s99, 33 +; GFX9-NEXT: v_writelane_b32 v63, s30, 34 +; GFX9-NEXT: v_writelane_b32 v63, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v63, s99, 35 ; GFX9-NEXT: v_readfirstlane_b32 s44, v3 ; GFX9-NEXT: v_readfirstlane_b32 s45, v4 ; GFX9-NEXT: v_readfirstlane_b32 s42, v5 @@ -195466,20 +197594,6 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB95_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -196329,42 +198443,42 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; GFX9-NEXT: v_or_b32_sdwa v25, v25, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_lshlrev_b32_e32 v30, 8, v44 ; GFX9-NEXT: v_or_b32_sdwa v26, v26, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_readlane_b32 s99, v63, 35 -; GFX9-NEXT: v_readlane_b32 s98, v63, 34 -; GFX9-NEXT: v_readlane_b32 s97, v63, 33 -; GFX9-NEXT: v_readlane_b32 s96, v63, 32 -; GFX9-NEXT: v_readlane_b32 s87, v63, 31 -; GFX9-NEXT: v_readlane_b32 s86, v63, 30 -; GFX9-NEXT: v_readlane_b32 s85, v63, 29 -; GFX9-NEXT: v_readlane_b32 s84, v63, 28 -; GFX9-NEXT: v_readlane_b32 s83, v63, 27 -; GFX9-NEXT: v_readlane_b32 s82, v63, 26 -; GFX9-NEXT: v_readlane_b32 s81, v63, 25 -; GFX9-NEXT: v_readlane_b32 s80, v63, 24 -; GFX9-NEXT: v_readlane_b32 s71, v63, 23 -; GFX9-NEXT: v_readlane_b32 s70, v63, 22 -; GFX9-NEXT: v_readlane_b32 s69, v63, 21 -; GFX9-NEXT: v_readlane_b32 s68, v63, 20 -; GFX9-NEXT: v_readlane_b32 s67, v63, 19 -; GFX9-NEXT: v_readlane_b32 s66, v63, 18 -; GFX9-NEXT: v_readlane_b32 s65, v63, 17 -; GFX9-NEXT: v_readlane_b32 s64, v63, 16 -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 34 +; GFX9-NEXT: v_readlane_b32 s31, v63, 35 +; GFX9-NEXT: v_readlane_b32 s99, v63, 33 +; GFX9-NEXT: v_readlane_b32 s98, v63, 32 +; GFX9-NEXT: v_readlane_b32 s97, v63, 31 +; GFX9-NEXT: v_readlane_b32 s96, v63, 30 +; GFX9-NEXT: v_readlane_b32 s87, v63, 29 +; GFX9-NEXT: v_readlane_b32 s86, v63, 28 +; GFX9-NEXT: v_readlane_b32 s85, v63, 27 +; GFX9-NEXT: v_readlane_b32 s84, v63, 26 +; GFX9-NEXT: v_readlane_b32 s83, v63, 25 +; GFX9-NEXT: v_readlane_b32 s82, v63, 24 +; GFX9-NEXT: v_readlane_b32 s81, v63, 23 +; GFX9-NEXT: v_readlane_b32 s80, v63, 22 +; GFX9-NEXT: v_readlane_b32 s71, v63, 21 +; GFX9-NEXT: v_readlane_b32 s70, v63, 20 +; GFX9-NEXT: v_readlane_b32 s69, v63, 19 +; GFX9-NEXT: v_readlane_b32 s68, v63, 18 +; GFX9-NEXT: v_readlane_b32 s67, v63, 17 +; GFX9-NEXT: v_readlane_b32 s66, v63, 16 +; GFX9-NEXT: v_readlane_b32 s65, v63, 15 +; GFX9-NEXT: v_readlane_b32 s64, v63, 14 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v15, 8, v15 ; GFX9-NEXT: v_or_b32_sdwa v15, v38, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -196717,90 +198831,111 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:88 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v75, s30, 0 -; GFX11-NEXT: v_writelane_b32 v76, s96, 0 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 -; GFX11-NEXT: v_readfirstlane_b32 s40, v1 -; GFX11-NEXT: v_readfirstlane_b32 s41, v2 -; GFX11-NEXT: v_writelane_b32 v75, s31, 1 -; GFX11-NEXT: v_writelane_b32 v76, s97, 1 -; GFX11-NEXT: v_readfirstlane_b32 s14, v3 -; GFX11-NEXT: v_readfirstlane_b32 s15, v4 -; GFX11-NEXT: v_readfirstlane_b32 s12, v5 -; GFX11-NEXT: v_writelane_b32 v75, s34, 2 -; GFX11-NEXT: v_writelane_b32 v76, s98, 2 -; GFX11-NEXT: v_readfirstlane_b32 s13, v6 -; GFX11-NEXT: v_readfirstlane_b32 s10, v7 -; GFX11-NEXT: v_readfirstlane_b32 s11, v8 -; GFX11-NEXT: v_writelane_b32 v75, s35, 3 -; GFX11-NEXT: v_writelane_b32 v76, s99, 3 -; GFX11-NEXT: v_readfirstlane_b32 s8, v9 -; GFX11-NEXT: v_readfirstlane_b32 s9, v10 -; GFX11-NEXT: v_readfirstlane_b32 s6, v11 -; GFX11-NEXT: v_writelane_b32 v75, s36, 4 -; GFX11-NEXT: v_writelane_b32 v76, s100, 4 -; GFX11-NEXT: v_readfirstlane_b32 s7, v12 -; GFX11-NEXT: v_readfirstlane_b32 s4, v13 -; GFX11-NEXT: v_readfirstlane_b32 s5, v14 -; GFX11-NEXT: v_writelane_b32 v75, s37, 5 -; GFX11-NEXT: v_writelane_b32 v76, s101, 5 -; GFX11-NEXT: s_mov_b32 s99, 0 -; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: s_clause 0x12 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 -; GFX11-NEXT: v_writelane_b32 v75, s38, 6 -; GFX11-NEXT: v_writelane_b32 v76, s102, 6 +; GFX11-NEXT: v_writelane_b32 v75, s34, 0 +; GFX11-NEXT: v_writelane_b32 v75, s35, 1 +; GFX11-NEXT: v_writelane_b32 v75, s36, 2 +; GFX11-NEXT: v_writelane_b32 v75, s37, 3 +; GFX11-NEXT: v_writelane_b32 v75, s38, 4 +; GFX11-NEXT: v_writelane_b32 v75, s39, 5 +; GFX11-NEXT: v_writelane_b32 v75, s48, 6 +; GFX11-NEXT: v_writelane_b32 v75, s49, 7 +; GFX11-NEXT: v_writelane_b32 v75, s50, 8 +; GFX11-NEXT: v_writelane_b32 v75, s51, 9 +; GFX11-NEXT: v_writelane_b32 v75, s52, 10 +; GFX11-NEXT: v_writelane_b32 v75, s53, 11 +; GFX11-NEXT: v_writelane_b32 v75, s54, 12 +; GFX11-NEXT: v_writelane_b32 v75, s55, 13 +; GFX11-NEXT: v_writelane_b32 v75, s64, 14 +; GFX11-NEXT: v_writelane_b32 v75, s65, 15 +; GFX11-NEXT: v_writelane_b32 v75, s66, 16 +; GFX11-NEXT: v_writelane_b32 v75, s67, 17 +; GFX11-NEXT: v_writelane_b32 v75, s68, 18 +; GFX11-NEXT: v_writelane_b32 v75, s69, 19 +; GFX11-NEXT: v_writelane_b32 v75, s70, 20 +; GFX11-NEXT: v_writelane_b32 v75, s71, 21 +; GFX11-NEXT: v_writelane_b32 v75, s80, 22 +; GFX11-NEXT: v_writelane_b32 v75, s81, 23 +; GFX11-NEXT: v_writelane_b32 v75, s82, 24 +; GFX11-NEXT: v_writelane_b32 v75, s83, 25 +; GFX11-NEXT: v_writelane_b32 v75, s84, 26 +; GFX11-NEXT: v_writelane_b32 v75, s85, 27 +; GFX11-NEXT: v_writelane_b32 v75, s86, 28 +; GFX11-NEXT: v_writelane_b32 v75, s87, 29 +; GFX11-NEXT: v_writelane_b32 v75, s96, 30 +; GFX11-NEXT: v_writelane_b32 v75, s97, 31 +; GFX11-NEXT: v_writelane_b32 v76, s98, 0 +; GFX11-NEXT: v_writelane_b32 v76, s99, 1 +; GFX11-NEXT: v_writelane_b32 v76, s100, 2 +; GFX11-NEXT: v_writelane_b32 v76, s101, 3 +; GFX11-NEXT: v_writelane_b32 v76, s102, 4 +; GFX11-NEXT: v_writelane_b32 v76, s103, 5 +; GFX11-NEXT: v_writelane_b32 v76, s104, 6 +; GFX11-NEXT: v_writelane_b32 v76, s30, 7 +; GFX11-NEXT: v_writelane_b32 v76, s31, 8 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 +; GFX11-NEXT: v_readfirstlane_b32 s40, v1 +; GFX11-NEXT: v_readfirstlane_b32 s41, v2 +; GFX11-NEXT: v_readfirstlane_b32 s14, v3 +; GFX11-NEXT: v_readfirstlane_b32 s15, v4 +; GFX11-NEXT: v_readfirstlane_b32 s12, v5 +; GFX11-NEXT: v_readfirstlane_b32 s13, v6 +; GFX11-NEXT: v_readfirstlane_b32 s10, v7 +; GFX11-NEXT: v_readfirstlane_b32 s11, v8 +; GFX11-NEXT: v_readfirstlane_b32 s8, v9 +; GFX11-NEXT: v_readfirstlane_b32 s9, v10 +; GFX11-NEXT: v_readfirstlane_b32 s6, v11 +; GFX11-NEXT: v_readfirstlane_b32 s7, v12 +; GFX11-NEXT: v_readfirstlane_b32 s4, v13 +; GFX11-NEXT: v_readfirstlane_b32 s5, v14 +; GFX11-NEXT: s_mov_b32 s99, 0 +; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr78 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr77 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v75, s39, 7 -; GFX11-NEXT: v_writelane_b32 v76, s103, 7 -; GFX11-NEXT: v_writelane_b32 v75, s48, 8 -; GFX11-NEXT: v_writelane_b32 v76, s104, 8 -; GFX11-NEXT: v_writelane_b32 v75, s49, 9 -; GFX11-NEXT: v_writelane_b32 v75, s50, 10 -; GFX11-NEXT: v_writelane_b32 v75, s51, 11 -; GFX11-NEXT: v_writelane_b32 v75, s52, 12 -; GFX11-NEXT: v_writelane_b32 v75, s53, 13 -; GFX11-NEXT: v_writelane_b32 v75, s54, 14 -; GFX11-NEXT: v_writelane_b32 v75, s55, 15 -; GFX11-NEXT: v_writelane_b32 v75, s64, 16 -; GFX11-NEXT: v_writelane_b32 v75, s65, 17 -; GFX11-NEXT: v_writelane_b32 v75, s66, 18 -; GFX11-NEXT: v_writelane_b32 v75, s67, 19 -; GFX11-NEXT: v_writelane_b32 v75, s68, 20 -; GFX11-NEXT: v_writelane_b32 v75, s69, 21 -; GFX11-NEXT: v_writelane_b32 v75, s70, 22 -; GFX11-NEXT: v_writelane_b32 v75, s71, 23 -; GFX11-NEXT: v_writelane_b32 v75, s80, 24 -; GFX11-NEXT: v_writelane_b32 v75, s81, 25 -; GFX11-NEXT: v_writelane_b32 v75, s82, 26 -; GFX11-NEXT: v_writelane_b32 v75, s83, 27 -; GFX11-NEXT: v_writelane_b32 v75, s84, 28 -; GFX11-NEXT: v_writelane_b32 v75, s85, 29 -; GFX11-NEXT: v_writelane_b32 v75, s86, 30 -; GFX11-NEXT: v_writelane_b32 v75, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB95_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s27, 16 @@ -197689,47 +199824,47 @@ define inreg <128 x i8> @bitcast_v64f16_to_v128i8_scalar(<64 x half> inreg %a, i ; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:64 ; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:72 -; GFX11-NEXT: v_readlane_b32 s104, v76, 8 -; GFX11-NEXT: v_readlane_b32 s103, v76, 7 -; GFX11-NEXT: v_readlane_b32 s102, v76, 6 -; GFX11-NEXT: v_readlane_b32 s101, v76, 5 -; GFX11-NEXT: v_readlane_b32 s100, v76, 4 -; GFX11-NEXT: v_readlane_b32 s99, v76, 3 -; GFX11-NEXT: v_readlane_b32 s98, v76, 2 -; GFX11-NEXT: v_readlane_b32 s97, v76, 1 -; GFX11-NEXT: v_readlane_b32 s96, v76, 0 -; GFX11-NEXT: v_readlane_b32 s87, v75, 31 -; GFX11-NEXT: v_readlane_b32 s86, v75, 30 -; GFX11-NEXT: v_readlane_b32 s85, v75, 29 -; GFX11-NEXT: v_readlane_b32 s84, v75, 28 -; GFX11-NEXT: v_readlane_b32 s83, v75, 27 -; GFX11-NEXT: v_readlane_b32 s82, v75, 26 -; GFX11-NEXT: v_readlane_b32 s81, v75, 25 -; GFX11-NEXT: v_readlane_b32 s80, v75, 24 -; GFX11-NEXT: v_readlane_b32 s71, v75, 23 -; GFX11-NEXT: v_readlane_b32 s70, v75, 22 -; GFX11-NEXT: v_readlane_b32 s69, v75, 21 -; GFX11-NEXT: v_readlane_b32 s68, v75, 20 -; GFX11-NEXT: v_readlane_b32 s67, v75, 19 -; GFX11-NEXT: v_readlane_b32 s66, v75, 18 -; GFX11-NEXT: v_readlane_b32 s65, v75, 17 -; GFX11-NEXT: v_readlane_b32 s64, v75, 16 -; GFX11-NEXT: v_readlane_b32 s55, v75, 15 -; GFX11-NEXT: v_readlane_b32 s54, v75, 14 -; GFX11-NEXT: v_readlane_b32 s53, v75, 13 -; GFX11-NEXT: v_readlane_b32 s52, v75, 12 -; GFX11-NEXT: v_readlane_b32 s51, v75, 11 -; GFX11-NEXT: v_readlane_b32 s50, v75, 10 -; GFX11-NEXT: v_readlane_b32 s49, v75, 9 -; GFX11-NEXT: v_readlane_b32 s48, v75, 8 -; GFX11-NEXT: v_readlane_b32 s39, v75, 7 -; GFX11-NEXT: v_readlane_b32 s38, v75, 6 -; GFX11-NEXT: v_readlane_b32 s37, v75, 5 -; GFX11-NEXT: v_readlane_b32 s36, v75, 4 -; GFX11-NEXT: v_readlane_b32 s35, v75, 3 -; GFX11-NEXT: v_readlane_b32 s34, v75, 2 -; GFX11-NEXT: v_readlane_b32 s31, v75, 1 -; GFX11-NEXT: v_readlane_b32 s30, v75, 0 +; GFX11-NEXT: v_readlane_b32 s30, v76, 7 +; GFX11-NEXT: v_readlane_b32 s31, v76, 8 +; GFX11-NEXT: v_readlane_b32 s104, v76, 6 +; GFX11-NEXT: v_readlane_b32 s103, v76, 5 +; GFX11-NEXT: v_readlane_b32 s102, v76, 4 +; GFX11-NEXT: v_readlane_b32 s101, v76, 3 +; GFX11-NEXT: v_readlane_b32 s100, v76, 2 +; GFX11-NEXT: v_readlane_b32 s99, v76, 1 +; GFX11-NEXT: v_readlane_b32 s98, v76, 0 +; GFX11-NEXT: v_readlane_b32 s97, v75, 31 +; GFX11-NEXT: v_readlane_b32 s96, v75, 30 +; GFX11-NEXT: v_readlane_b32 s87, v75, 29 +; GFX11-NEXT: v_readlane_b32 s86, v75, 28 +; GFX11-NEXT: v_readlane_b32 s85, v75, 27 +; GFX11-NEXT: v_readlane_b32 s84, v75, 26 +; GFX11-NEXT: v_readlane_b32 s83, v75, 25 +; GFX11-NEXT: v_readlane_b32 s82, v75, 24 +; GFX11-NEXT: v_readlane_b32 s81, v75, 23 +; GFX11-NEXT: v_readlane_b32 s80, v75, 22 +; GFX11-NEXT: v_readlane_b32 s71, v75, 21 +; GFX11-NEXT: v_readlane_b32 s70, v75, 20 +; GFX11-NEXT: v_readlane_b32 s69, v75, 19 +; GFX11-NEXT: v_readlane_b32 s68, v75, 18 +; GFX11-NEXT: v_readlane_b32 s67, v75, 17 +; GFX11-NEXT: v_readlane_b32 s66, v75, 16 +; GFX11-NEXT: v_readlane_b32 s65, v75, 15 +; GFX11-NEXT: v_readlane_b32 s64, v75, 14 +; GFX11-NEXT: v_readlane_b32 s55, v75, 13 +; GFX11-NEXT: v_readlane_b32 s54, v75, 12 +; GFX11-NEXT: v_readlane_b32 s53, v75, 11 +; GFX11-NEXT: v_readlane_b32 s52, v75, 10 +; GFX11-NEXT: v_readlane_b32 s51, v75, 9 +; GFX11-NEXT: v_readlane_b32 s50, v75, 8 +; GFX11-NEXT: v_readlane_b32 s49, v75, 7 +; GFX11-NEXT: v_readlane_b32 s48, v75, 6 +; GFX11-NEXT: v_readlane_b32 s39, v75, 5 +; GFX11-NEXT: v_readlane_b32 s38, v75, 4 +; GFX11-NEXT: v_readlane_b32 s37, v75, 3 +; GFX11-NEXT: v_readlane_b32 s36, v75, 2 +; GFX11-NEXT: v_readlane_b32 s35, v75, 1 +; GFX11-NEXT: v_readlane_b32 s34, v75, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:76 @@ -203149,53 +205284,99 @@ define <64 x i16> @bitcast_v128i8_to_v64i16(<128 x i8> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1f ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:580 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:576 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:572 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:568 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:564 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:560 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:556 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:552 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:548 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:544 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:540 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:536 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:532 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:528 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:524 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:520 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:516 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:512 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:508 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:504 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:500 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:496 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:492 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:488 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:484 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:480 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:476 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:472 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:468 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:464 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:460 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:456 ; GFX11-FAKE16-NEXT: s_clause 0xf ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:452 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:448 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:444 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:392 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v37, v30 :: v_dual_mov_b32 v54, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v55, v28 :: v_dual_mov_b32 v52, v26 @@ -204088,6 +206269,43 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill +; SI-NEXT: s_waitcnt expcnt(3) +; SI-NEXT: v_writelane_b32 v41, s34, 0 +; SI-NEXT: v_writelane_b32 v41, s35, 1 +; SI-NEXT: v_writelane_b32 v41, s36, 2 +; SI-NEXT: v_writelane_b32 v41, s37, 3 +; SI-NEXT: v_writelane_b32 v41, s38, 4 +; SI-NEXT: v_writelane_b32 v41, s39, 5 +; SI-NEXT: v_writelane_b32 v41, s48, 6 +; SI-NEXT: v_writelane_b32 v41, s49, 7 +; SI-NEXT: v_writelane_b32 v41, s50, 8 +; SI-NEXT: v_writelane_b32 v41, s51, 9 +; SI-NEXT: v_writelane_b32 v41, s52, 10 +; SI-NEXT: v_writelane_b32 v41, s53, 11 +; SI-NEXT: v_writelane_b32 v41, s54, 12 +; SI-NEXT: v_writelane_b32 v41, s55, 13 +; SI-NEXT: v_writelane_b32 v41, s64, 14 +; SI-NEXT: v_writelane_b32 v41, s65, 15 +; SI-NEXT: v_writelane_b32 v41, s66, 16 +; SI-NEXT: v_writelane_b32 v41, s67, 17 +; SI-NEXT: v_writelane_b32 v41, s68, 18 +; SI-NEXT: v_writelane_b32 v41, s69, 19 +; SI-NEXT: v_writelane_b32 v41, s70, 20 +; SI-NEXT: v_writelane_b32 v41, s71, 21 +; SI-NEXT: v_writelane_b32 v41, s80, 22 +; SI-NEXT: v_writelane_b32 v41, s81, 23 +; SI-NEXT: v_writelane_b32 v41, s82, 24 +; SI-NEXT: v_writelane_b32 v41, s83, 25 +; SI-NEXT: v_writelane_b32 v41, s84, 26 +; SI-NEXT: v_writelane_b32 v41, s85, 27 +; SI-NEXT: v_writelane_b32 v41, s86, 28 +; SI-NEXT: v_writelane_b32 v41, s87, 29 +; SI-NEXT: v_writelane_b32 v41, s96, 30 +; SI-NEXT: v_writelane_b32 v41, s97, 31 +; SI-NEXT: v_writelane_b32 v41, s98, 32 +; SI-NEXT: v_writelane_b32 v41, s99, 33 +; SI-NEXT: v_writelane_b32 v41, s30, 34 +; SI-NEXT: v_writelane_b32 v41, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:332 ; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:328 ; SI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:324 @@ -204097,8 +206315,7 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:308 ; SI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:304 ; SI-NEXT: ; implicit-def: $vgpr43 : SGPR spill to VGPR lane -; SI-NEXT: s_waitcnt expcnt(3) -; SI-NEXT: v_writelane_b32 v41, s30, 0 +; SI-NEXT: v_readfirstlane_b32 s39, v26 ; SI-NEXT: s_waitcnt expcnt(1) ; SI-NEXT: v_writelane_b32 v43, s29, 0 ; SI-NEXT: v_writelane_b32 v43, s28, 1 @@ -204114,41 +206331,6 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: v_writelane_b32 v43, s18, 11 ; SI-NEXT: v_writelane_b32 v43, s17, 12 ; SI-NEXT: v_writelane_b32 v43, s16, 13 -; SI-NEXT: v_writelane_b32 v41, s31, 1 -; SI-NEXT: v_writelane_b32 v41, s34, 2 -; SI-NEXT: v_writelane_b32 v41, s35, 3 -; SI-NEXT: v_writelane_b32 v41, s36, 4 -; SI-NEXT: v_writelane_b32 v41, s37, 5 -; SI-NEXT: v_writelane_b32 v41, s38, 6 -; SI-NEXT: v_writelane_b32 v41, s39, 7 -; SI-NEXT: v_writelane_b32 v41, s48, 8 -; SI-NEXT: v_writelane_b32 v41, s49, 9 -; SI-NEXT: v_writelane_b32 v41, s50, 10 -; SI-NEXT: v_writelane_b32 v41, s51, 11 -; SI-NEXT: v_writelane_b32 v41, s52, 12 -; SI-NEXT: v_writelane_b32 v41, s53, 13 -; SI-NEXT: v_writelane_b32 v41, s54, 14 -; SI-NEXT: v_writelane_b32 v41, s55, 15 -; SI-NEXT: v_writelane_b32 v41, s64, 16 -; SI-NEXT: v_writelane_b32 v41, s65, 17 -; SI-NEXT: v_writelane_b32 v41, s66, 18 -; SI-NEXT: v_writelane_b32 v41, s67, 19 -; SI-NEXT: v_writelane_b32 v41, s68, 20 -; SI-NEXT: v_writelane_b32 v41, s69, 21 -; SI-NEXT: v_writelane_b32 v41, s70, 22 -; SI-NEXT: v_writelane_b32 v41, s71, 23 -; SI-NEXT: v_writelane_b32 v41, s80, 24 -; SI-NEXT: v_writelane_b32 v41, s81, 25 -; SI-NEXT: v_writelane_b32 v41, s82, 26 -; SI-NEXT: v_writelane_b32 v41, s83, 27 -; SI-NEXT: v_writelane_b32 v41, s84, 28 -; SI-NEXT: v_writelane_b32 v41, s85, 29 -; SI-NEXT: v_writelane_b32 v41, s86, 30 -; SI-NEXT: v_writelane_b32 v41, s87, 31 -; SI-NEXT: v_writelane_b32 v41, s96, 32 -; SI-NEXT: v_writelane_b32 v41, s97, 33 -; SI-NEXT: v_writelane_b32 v41, s98, 34 -; SI-NEXT: v_readfirstlane_b32 s39, v26 ; SI-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane ; SI-NEXT: v_readfirstlane_b32 s47, v12 ; SI-NEXT: v_writelane_b32 v42, s39, 0 @@ -204172,6 +206354,18 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s11, v1 ; SI-NEXT: v_readfirstlane_b32 s12, v2 ; SI-NEXT: v_readfirstlane_b32 s13, v9 +; SI-NEXT: v_readfirstlane_b32 s14, v10 +; SI-NEXT: v_readfirstlane_b32 s15, v8 +; SI-NEXT: v_readfirstlane_b32 s18, v7 +; SI-NEXT: v_readfirstlane_b32 s21, v5 +; SI-NEXT: v_readfirstlane_b32 s22, v6 +; SI-NEXT: v_readfirstlane_b32 s40, v17 +; SI-NEXT: v_readfirstlane_b32 s41, v18 +; SI-NEXT: v_readfirstlane_b32 s42, v4 +; SI-NEXT: v_readfirstlane_b32 s43, v3 +; SI-NEXT: v_readfirstlane_b32 s76, v16 +; SI-NEXT: v_readfirstlane_b32 s77, v15 +; SI-NEXT: v_readfirstlane_b32 s38, v25 ; SI-NEXT: s_waitcnt vmcnt(7) ; SI-NEXT: v_readfirstlane_b32 s4, v31 ; SI-NEXT: v_writelane_b32 v43, s4, 14 @@ -204205,19 +206399,6 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:256 ; SI-NEXT: s_waitcnt vmcnt(12) ; SI-NEXT: v_readfirstlane_b32 s6, v38 -; SI-NEXT: v_readfirstlane_b32 s14, v10 -; SI-NEXT: v_readfirstlane_b32 s15, v8 -; SI-NEXT: v_readfirstlane_b32 s18, v7 -; SI-NEXT: v_readfirstlane_b32 s21, v5 -; SI-NEXT: v_readfirstlane_b32 s22, v6 -; SI-NEXT: v_readfirstlane_b32 s40, v17 -; SI-NEXT: v_readfirstlane_b32 s41, v18 -; SI-NEXT: v_readfirstlane_b32 s42, v4 -; SI-NEXT: v_readfirstlane_b32 s43, v3 -; SI-NEXT: v_readfirstlane_b32 s76, v16 -; SI-NEXT: v_readfirstlane_b32 s77, v15 -; SI-NEXT: v_readfirstlane_b32 s38, v25 -; SI-NEXT: v_writelane_b32 v41, s99, 35 ; SI-NEXT: s_waitcnt vmcnt(11) ; SI-NEXT: v_readfirstlane_b32 s4, v31 ; SI-NEXT: v_writelane_b32 v43, s4, 19 @@ -205721,42 +207902,42 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; SI-NEXT: v_mov_b32_e32 v1, s4 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; SI-NEXT: v_readlane_b32 s99, v41, 35 -; SI-NEXT: v_readlane_b32 s98, v41, 34 -; SI-NEXT: v_readlane_b32 s97, v41, 33 -; SI-NEXT: v_readlane_b32 s96, v41, 32 -; SI-NEXT: v_readlane_b32 s87, v41, 31 -; SI-NEXT: v_readlane_b32 s86, v41, 30 -; SI-NEXT: v_readlane_b32 s85, v41, 29 -; SI-NEXT: v_readlane_b32 s84, v41, 28 -; SI-NEXT: v_readlane_b32 s83, v41, 27 -; SI-NEXT: v_readlane_b32 s82, v41, 26 -; SI-NEXT: v_readlane_b32 s81, v41, 25 -; SI-NEXT: v_readlane_b32 s80, v41, 24 -; SI-NEXT: v_readlane_b32 s71, v41, 23 -; SI-NEXT: v_readlane_b32 s70, v41, 22 -; SI-NEXT: v_readlane_b32 s69, v41, 21 -; SI-NEXT: v_readlane_b32 s68, v41, 20 -; SI-NEXT: v_readlane_b32 s67, v41, 19 -; SI-NEXT: v_readlane_b32 s66, v41, 18 -; SI-NEXT: v_readlane_b32 s65, v41, 17 -; SI-NEXT: v_readlane_b32 s64, v41, 16 -; SI-NEXT: v_readlane_b32 s55, v41, 15 -; SI-NEXT: v_readlane_b32 s54, v41, 14 -; SI-NEXT: v_readlane_b32 s53, v41, 13 -; SI-NEXT: v_readlane_b32 s52, v41, 12 -; SI-NEXT: v_readlane_b32 s51, v41, 11 -; SI-NEXT: v_readlane_b32 s50, v41, 10 -; SI-NEXT: v_readlane_b32 s49, v41, 9 -; SI-NEXT: v_readlane_b32 s48, v41, 8 -; SI-NEXT: v_readlane_b32 s39, v41, 7 -; SI-NEXT: v_readlane_b32 s38, v41, 6 -; SI-NEXT: v_readlane_b32 s37, v41, 5 -; SI-NEXT: v_readlane_b32 s36, v41, 4 -; SI-NEXT: v_readlane_b32 s35, v41, 3 -; SI-NEXT: v_readlane_b32 s34, v41, 2 -; SI-NEXT: v_readlane_b32 s31, v41, 1 -; SI-NEXT: v_readlane_b32 s30, v41, 0 +; SI-NEXT: v_readlane_b32 s30, v41, 34 +; SI-NEXT: v_readlane_b32 s31, v41, 35 +; SI-NEXT: v_readlane_b32 s99, v41, 33 +; SI-NEXT: v_readlane_b32 s98, v41, 32 +; SI-NEXT: v_readlane_b32 s97, v41, 31 +; SI-NEXT: v_readlane_b32 s96, v41, 30 +; SI-NEXT: v_readlane_b32 s87, v41, 29 +; SI-NEXT: v_readlane_b32 s86, v41, 28 +; SI-NEXT: v_readlane_b32 s85, v41, 27 +; SI-NEXT: v_readlane_b32 s84, v41, 26 +; SI-NEXT: v_readlane_b32 s83, v41, 25 +; SI-NEXT: v_readlane_b32 s82, v41, 24 +; SI-NEXT: v_readlane_b32 s81, v41, 23 +; SI-NEXT: v_readlane_b32 s80, v41, 22 +; SI-NEXT: v_readlane_b32 s71, v41, 21 +; SI-NEXT: v_readlane_b32 s70, v41, 20 +; SI-NEXT: v_readlane_b32 s69, v41, 19 +; SI-NEXT: v_readlane_b32 s68, v41, 18 +; SI-NEXT: v_readlane_b32 s67, v41, 17 +; SI-NEXT: v_readlane_b32 s66, v41, 16 +; SI-NEXT: v_readlane_b32 s65, v41, 15 +; SI-NEXT: v_readlane_b32 s64, v41, 14 +; SI-NEXT: v_readlane_b32 s55, v41, 13 +; SI-NEXT: v_readlane_b32 s54, v41, 12 +; SI-NEXT: v_readlane_b32 s53, v41, 11 +; SI-NEXT: v_readlane_b32 s52, v41, 10 +; SI-NEXT: v_readlane_b32 s51, v41, 9 +; SI-NEXT: v_readlane_b32 s50, v41, 8 +; SI-NEXT: v_readlane_b32 s49, v41, 7 +; SI-NEXT: v_readlane_b32 s48, v41, 6 +; SI-NEXT: v_readlane_b32 s39, v41, 5 +; SI-NEXT: v_readlane_b32 s38, v41, 4 +; SI-NEXT: v_readlane_b32 s37, v41, 3 +; SI-NEXT: v_readlane_b32 s36, v41, 2 +; SI-NEXT: v_readlane_b32 s35, v41, 1 +; SI-NEXT: v_readlane_b32 s34, v41, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload @@ -207905,35 +210086,65 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1e ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -208677,35 +210888,65 @@ define inreg <64 x i16> @bitcast_v128i8_to_v64i16_scalar(<128 x i8> inreg %a, i3 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1e ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:440 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:436 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:432 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:428 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:424 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:420 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:416 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:412 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:408 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:404 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:400 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:396 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:392 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:388 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:384 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:380 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:376 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:372 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:368 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:364 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:360 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:356 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:352 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:348 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:344 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:340 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:336 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:332 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:328 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:324 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:320 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v54, v30 :: v_dual_mov_b32 v51, v24 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, v28 :: v_dual_mov_b32 v55, v26 @@ -214217,24 +216458,43 @@ define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x13 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:88 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:84 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:80 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:76 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:72 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:68 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:64 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:60 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:56 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:52 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:48 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:44 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:40 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:36 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:32 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:28 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:24 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:20 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:16 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:12 ; GFX11-FAKE16-NEXT: s_clause 0x2 ; GFX11-FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8 @@ -214925,6 +217185,43 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] +; SI-NEXT: s_waitcnt expcnt(3) +; SI-NEXT: v_writelane_b32 v40, s34, 0 +; SI-NEXT: v_writelane_b32 v40, s35, 1 +; SI-NEXT: v_writelane_b32 v40, s36, 2 +; SI-NEXT: v_writelane_b32 v40, s37, 3 +; SI-NEXT: v_writelane_b32 v40, s38, 4 +; SI-NEXT: v_writelane_b32 v40, s39, 5 +; SI-NEXT: v_writelane_b32 v40, s48, 6 +; SI-NEXT: v_writelane_b32 v40, s49, 7 +; SI-NEXT: v_writelane_b32 v40, s50, 8 +; SI-NEXT: v_writelane_b32 v40, s51, 9 +; SI-NEXT: v_writelane_b32 v40, s52, 10 +; SI-NEXT: v_writelane_b32 v40, s53, 11 +; SI-NEXT: v_writelane_b32 v40, s54, 12 +; SI-NEXT: v_writelane_b32 v40, s55, 13 +; SI-NEXT: v_writelane_b32 v40, s64, 14 +; SI-NEXT: v_writelane_b32 v40, s65, 15 +; SI-NEXT: v_writelane_b32 v40, s66, 16 +; SI-NEXT: v_writelane_b32 v40, s67, 17 +; SI-NEXT: v_writelane_b32 v40, s68, 18 +; SI-NEXT: v_writelane_b32 v40, s69, 19 +; SI-NEXT: v_writelane_b32 v40, s70, 20 +; SI-NEXT: v_writelane_b32 v40, s71, 21 +; SI-NEXT: v_writelane_b32 v40, s80, 22 +; SI-NEXT: v_writelane_b32 v40, s81, 23 +; SI-NEXT: v_writelane_b32 v40, s82, 24 +; SI-NEXT: v_writelane_b32 v40, s83, 25 +; SI-NEXT: v_writelane_b32 v40, s84, 26 +; SI-NEXT: v_writelane_b32 v40, s85, 27 +; SI-NEXT: v_writelane_b32 v40, s86, 28 +; SI-NEXT: v_writelane_b32 v40, s87, 29 +; SI-NEXT: v_writelane_b32 v40, s96, 30 +; SI-NEXT: v_writelane_b32 v40, s97, 31 +; SI-NEXT: v_writelane_b32 v40, s98, 32 +; SI-NEXT: v_writelane_b32 v40, s99, 33 +; SI-NEXT: v_writelane_b32 v40, s30, 34 +; SI-NEXT: v_writelane_b32 v40, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 ; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72 ; SI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:68 @@ -214933,36 +217230,7 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:56 ; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:52 ; SI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:48 -; SI-NEXT: s_waitcnt expcnt(3) -; SI-NEXT: v_writelane_b32 v40, s30, 0 -; SI-NEXT: v_writelane_b32 v40, s31, 1 -; SI-NEXT: v_writelane_b32 v40, s34, 2 -; SI-NEXT: v_writelane_b32 v40, s35, 3 -; SI-NEXT: v_writelane_b32 v40, s36, 4 -; SI-NEXT: v_writelane_b32 v40, s37, 5 -; SI-NEXT: v_writelane_b32 v40, s38, 6 -; SI-NEXT: v_writelane_b32 v40, s39, 7 -; SI-NEXT: v_writelane_b32 v40, s48, 8 -; SI-NEXT: v_writelane_b32 v40, s49, 9 -; SI-NEXT: v_writelane_b32 v40, s50, 10 -; SI-NEXT: v_writelane_b32 v40, s51, 11 -; SI-NEXT: v_writelane_b32 v40, s52, 12 -; SI-NEXT: v_writelane_b32 v40, s53, 13 -; SI-NEXT: v_writelane_b32 v40, s54, 14 -; SI-NEXT: v_writelane_b32 v40, s55, 15 -; SI-NEXT: v_writelane_b32 v40, s64, 16 -; SI-NEXT: v_writelane_b32 v40, s65, 17 -; SI-NEXT: v_writelane_b32 v40, s66, 18 -; SI-NEXT: v_writelane_b32 v40, s67, 19 -; SI-NEXT: v_writelane_b32 v40, s68, 20 -; SI-NEXT: v_writelane_b32 v40, s69, 21 -; SI-NEXT: v_writelane_b32 v40, s70, 22 ; SI-NEXT: s_mov_b32 s88, s17 -; SI-NEXT: v_writelane_b32 v40, s71, 23 -; SI-NEXT: v_writelane_b32 v40, s80, 24 -; SI-NEXT: v_writelane_b32 v40, s81, 25 -; SI-NEXT: v_writelane_b32 v40, s82, 26 -; SI-NEXT: v_writelane_b32 v40, s83, 27 ; SI-NEXT: v_readfirstlane_b32 s6, v16 ; SI-NEXT: ; implicit-def: $vgpr41 : SGPR spill to VGPR lane ; SI-NEXT: v_readfirstlane_b32 s7, v15 @@ -214988,14 +217256,6 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s16, v27 ; SI-NEXT: v_writelane_b32 v41, s14, 9 ; SI-NEXT: v_writelane_b32 v41, s16, 10 -; SI-NEXT: v_writelane_b32 v40, s84, 28 -; SI-NEXT: v_writelane_b32 v40, s85, 29 -; SI-NEXT: v_writelane_b32 v40, s86, 30 -; SI-NEXT: v_writelane_b32 v40, s87, 31 -; SI-NEXT: v_writelane_b32 v40, s96, 32 -; SI-NEXT: v_writelane_b32 v40, s97, 33 -; SI-NEXT: v_writelane_b32 v40, s98, 34 -; SI-NEXT: v_writelane_b32 v40, s99, 35 ; SI-NEXT: v_readfirstlane_b32 s98, v30 ; SI-NEXT: v_readfirstlane_b32 s97, v26 ; SI-NEXT: v_readfirstlane_b32 s96, v22 @@ -215008,6 +217268,15 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s85, v10 ; SI-NEXT: v_readfirstlane_b32 s51, v9 ; SI-NEXT: v_readfirstlane_b32 s53, v8 +; SI-NEXT: v_readfirstlane_b32 s65, v7 +; SI-NEXT: v_readfirstlane_b32 s84, v6 +; SI-NEXT: v_readfirstlane_b32 s31, v5 +; SI-NEXT: v_readfirstlane_b32 s37, v4 +; SI-NEXT: v_readfirstlane_b32 s49, v3 +; SI-NEXT: v_readfirstlane_b32 s78, v2 +; SI-NEXT: v_readfirstlane_b32 s39, v1 +; SI-NEXT: ; implicit-def: $vgpr43 : SGPR spill to VGPR lane +; SI-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane ; SI-NEXT: s_waitcnt vmcnt(7) ; SI-NEXT: v_readfirstlane_b32 s89, v31 ; SI-NEXT: s_waitcnt vmcnt(6) @@ -215037,15 +217306,6 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: s_waitcnt vmcnt(12) ; SI-NEXT: v_readfirstlane_b32 s83, v38 ; SI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:80 -; SI-NEXT: v_readfirstlane_b32 s65, v7 -; SI-NEXT: v_readfirstlane_b32 s84, v6 -; SI-NEXT: v_readfirstlane_b32 s31, v5 -; SI-NEXT: v_readfirstlane_b32 s37, v4 -; SI-NEXT: v_readfirstlane_b32 s49, v3 -; SI-NEXT: v_readfirstlane_b32 s78, v2 -; SI-NEXT: v_readfirstlane_b32 s39, v1 -; SI-NEXT: ; implicit-def: $vgpr43 : SGPR spill to VGPR lane -; SI-NEXT: ; implicit-def: $vgpr42 : SGPR spill to VGPR lane ; SI-NEXT: s_waitcnt vmcnt(12) ; SI-NEXT: v_readfirstlane_b32 s77, v31 ; SI-NEXT: s_waitcnt vmcnt(11) @@ -216310,6 +218570,7 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v40, 34 ; SI-NEXT: v_readlane_b32 s21, v41, 33 ; SI-NEXT: v_readlane_b32 s19, v41, 51 ; SI-NEXT: v_readlane_b32 s17, v41, 57 @@ -216318,42 +218579,41 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; SI-NEXT: v_readlane_b32 s11, v43, 11 ; SI-NEXT: v_readlane_b32 s9, v43, 17 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v40, 35 -; SI-NEXT: v_readlane_b32 s98, v40, 34 -; SI-NEXT: v_readlane_b32 s97, v40, 33 -; SI-NEXT: v_readlane_b32 s96, v40, 32 -; SI-NEXT: v_readlane_b32 s87, v40, 31 -; SI-NEXT: v_readlane_b32 s86, v40, 30 -; SI-NEXT: v_readlane_b32 s85, v40, 29 -; SI-NEXT: v_readlane_b32 s84, v40, 28 -; SI-NEXT: v_readlane_b32 s83, v40, 27 -; SI-NEXT: v_readlane_b32 s82, v40, 26 -; SI-NEXT: v_readlane_b32 s81, v40, 25 -; SI-NEXT: v_readlane_b32 s80, v40, 24 -; SI-NEXT: v_readlane_b32 s71, v40, 23 -; SI-NEXT: v_readlane_b32 s70, v40, 22 -; SI-NEXT: v_readlane_b32 s69, v40, 21 -; SI-NEXT: v_readlane_b32 s68, v40, 20 -; SI-NEXT: v_readlane_b32 s67, v40, 19 -; SI-NEXT: v_readlane_b32 s66, v40, 18 -; SI-NEXT: v_readlane_b32 s65, v40, 17 -; SI-NEXT: v_readlane_b32 s64, v40, 16 -; SI-NEXT: v_readlane_b32 s55, v40, 15 -; SI-NEXT: v_readlane_b32 s54, v40, 14 -; SI-NEXT: v_readlane_b32 s53, v40, 13 -; SI-NEXT: v_readlane_b32 s52, v40, 12 -; SI-NEXT: v_readlane_b32 s51, v40, 11 -; SI-NEXT: v_readlane_b32 s50, v40, 10 -; SI-NEXT: v_readlane_b32 s49, v40, 9 -; SI-NEXT: v_readlane_b32 s48, v40, 8 -; SI-NEXT: v_readlane_b32 s39, v40, 7 -; SI-NEXT: v_readlane_b32 s38, v40, 6 -; SI-NEXT: v_readlane_b32 s37, v40, 5 -; SI-NEXT: v_readlane_b32 s36, v40, 4 -; SI-NEXT: v_readlane_b32 s35, v40, 3 -; SI-NEXT: v_readlane_b32 s34, v40, 2 -; SI-NEXT: v_readlane_b32 s31, v40, 1 -; SI-NEXT: v_readlane_b32 s30, v40, 0 +; SI-NEXT: v_readlane_b32 s31, v40, 35 +; SI-NEXT: v_readlane_b32 s99, v40, 33 +; SI-NEXT: v_readlane_b32 s98, v40, 32 +; SI-NEXT: v_readlane_b32 s97, v40, 31 +; SI-NEXT: v_readlane_b32 s96, v40, 30 +; SI-NEXT: v_readlane_b32 s87, v40, 29 +; SI-NEXT: v_readlane_b32 s86, v40, 28 +; SI-NEXT: v_readlane_b32 s85, v40, 27 +; SI-NEXT: v_readlane_b32 s84, v40, 26 +; SI-NEXT: v_readlane_b32 s83, v40, 25 +; SI-NEXT: v_readlane_b32 s82, v40, 24 +; SI-NEXT: v_readlane_b32 s81, v40, 23 +; SI-NEXT: v_readlane_b32 s80, v40, 22 +; SI-NEXT: v_readlane_b32 s71, v40, 21 +; SI-NEXT: v_readlane_b32 s70, v40, 20 +; SI-NEXT: v_readlane_b32 s69, v40, 19 +; SI-NEXT: v_readlane_b32 s68, v40, 18 +; SI-NEXT: v_readlane_b32 s67, v40, 17 +; SI-NEXT: v_readlane_b32 s66, v40, 16 +; SI-NEXT: v_readlane_b32 s65, v40, 15 +; SI-NEXT: v_readlane_b32 s64, v40, 14 +; SI-NEXT: v_readlane_b32 s55, v40, 13 +; SI-NEXT: v_readlane_b32 s54, v40, 12 +; SI-NEXT: v_readlane_b32 s53, v40, 11 +; SI-NEXT: v_readlane_b32 s52, v40, 10 +; SI-NEXT: v_readlane_b32 s51, v40, 9 +; SI-NEXT: v_readlane_b32 s50, v40, 8 +; SI-NEXT: v_readlane_b32 s49, v40, 7 +; SI-NEXT: v_readlane_b32 s48, v40, 6 +; SI-NEXT: v_readlane_b32 s39, v40, 5 +; SI-NEXT: v_readlane_b32 s38, v40, 4 +; SI-NEXT: v_readlane_b32 s37, v40, 3 +; SI-NEXT: v_readlane_b32 s36, v40, 2 +; SI-NEXT: v_readlane_b32 s35, v40, 1 +; SI-NEXT: v_readlane_b32 s34, v40, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload @@ -216595,38 +218855,39 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; VI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v20, s30, 0 -; VI-NEXT: v_writelane_b32 v20, s31, 1 -; VI-NEXT: v_writelane_b32 v20, s34, 2 -; VI-NEXT: v_writelane_b32 v20, s35, 3 -; VI-NEXT: v_writelane_b32 v20, s36, 4 -; VI-NEXT: v_writelane_b32 v20, s37, 5 -; VI-NEXT: v_writelane_b32 v20, s38, 6 -; VI-NEXT: v_writelane_b32 v20, s39, 7 -; VI-NEXT: v_writelane_b32 v20, s48, 8 -; VI-NEXT: v_writelane_b32 v20, s49, 9 -; VI-NEXT: v_writelane_b32 v20, s50, 10 -; VI-NEXT: v_writelane_b32 v20, s51, 11 -; VI-NEXT: v_writelane_b32 v20, s52, 12 -; VI-NEXT: v_writelane_b32 v20, s53, 13 -; VI-NEXT: v_writelane_b32 v20, s54, 14 -; VI-NEXT: v_writelane_b32 v20, s55, 15 -; VI-NEXT: v_writelane_b32 v20, s64, 16 -; VI-NEXT: v_writelane_b32 v20, s65, 17 -; VI-NEXT: v_writelane_b32 v20, s66, 18 -; VI-NEXT: v_writelane_b32 v20, s67, 19 -; VI-NEXT: v_writelane_b32 v20, s68, 20 -; VI-NEXT: v_writelane_b32 v20, s69, 21 -; VI-NEXT: v_writelane_b32 v20, s70, 22 -; VI-NEXT: v_writelane_b32 v20, s71, 23 -; VI-NEXT: v_writelane_b32 v20, s80, 24 -; VI-NEXT: v_writelane_b32 v20, s81, 25 -; VI-NEXT: v_writelane_b32 v20, s82, 26 -; VI-NEXT: v_writelane_b32 v20, s83, 27 -; VI-NEXT: v_writelane_b32 v20, s84, 28 -; VI-NEXT: v_writelane_b32 v20, s85, 29 +; VI-NEXT: v_writelane_b32 v20, s34, 0 +; VI-NEXT: v_writelane_b32 v20, s35, 1 +; VI-NEXT: v_writelane_b32 v20, s36, 2 +; VI-NEXT: v_writelane_b32 v20, s37, 3 +; VI-NEXT: v_writelane_b32 v20, s38, 4 +; VI-NEXT: v_writelane_b32 v20, s39, 5 +; VI-NEXT: v_writelane_b32 v20, s48, 6 +; VI-NEXT: v_writelane_b32 v20, s49, 7 +; VI-NEXT: v_writelane_b32 v20, s50, 8 +; VI-NEXT: v_writelane_b32 v20, s51, 9 +; VI-NEXT: v_writelane_b32 v20, s52, 10 +; VI-NEXT: v_writelane_b32 v20, s53, 11 +; VI-NEXT: v_writelane_b32 v20, s54, 12 +; VI-NEXT: v_writelane_b32 v20, s55, 13 +; VI-NEXT: v_writelane_b32 v20, s64, 14 +; VI-NEXT: v_writelane_b32 v20, s65, 15 +; VI-NEXT: v_writelane_b32 v20, s66, 16 +; VI-NEXT: v_writelane_b32 v20, s67, 17 +; VI-NEXT: v_writelane_b32 v20, s68, 18 +; VI-NEXT: v_writelane_b32 v20, s69, 19 +; VI-NEXT: v_writelane_b32 v20, s70, 20 +; VI-NEXT: v_writelane_b32 v20, s71, 21 +; VI-NEXT: v_writelane_b32 v20, s80, 22 +; VI-NEXT: v_writelane_b32 v20, s81, 23 +; VI-NEXT: v_writelane_b32 v20, s82, 24 +; VI-NEXT: v_writelane_b32 v20, s83, 25 +; VI-NEXT: v_writelane_b32 v20, s84, 26 +; VI-NEXT: v_writelane_b32 v20, s85, 27 +; VI-NEXT: v_writelane_b32 v20, s86, 28 +; VI-NEXT: v_writelane_b32 v20, s87, 29 +; VI-NEXT: v_writelane_b32 v20, s30, 30 +; VI-NEXT: v_writelane_b32 v20, s31, 31 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; VI-NEXT: v_writelane_b32 v20, s86, 30 ; VI-NEXT: v_readfirstlane_b32 s42, v3 ; VI-NEXT: v_readfirstlane_b32 s43, v4 ; VI-NEXT: v_readfirstlane_b32 s40, v5 @@ -216646,7 +218907,6 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: s_and_b64 s[46:47], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s45, v2 -; VI-NEXT: v_writelane_b32 v20, s87, 31 ; VI-NEXT: ; implicit-def: $vgpr21 : SGPR spill to VGPR lane ; VI-NEXT: s_cbranch_scc0 .LBB99_4 ; VI-NEXT: ; %bb.1: ; %cmp.false @@ -217561,39 +219821,39 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 0x7c, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v20, 30 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s87, v20, 31 -; VI-NEXT: v_readlane_b32 s86, v20, 30 -; VI-NEXT: v_readlane_b32 s85, v20, 29 -; VI-NEXT: v_readlane_b32 s84, v20, 28 -; VI-NEXT: v_readlane_b32 s83, v20, 27 -; VI-NEXT: v_readlane_b32 s82, v20, 26 -; VI-NEXT: v_readlane_b32 s81, v20, 25 -; VI-NEXT: v_readlane_b32 s80, v20, 24 -; VI-NEXT: v_readlane_b32 s71, v20, 23 -; VI-NEXT: v_readlane_b32 s70, v20, 22 -; VI-NEXT: v_readlane_b32 s69, v20, 21 -; VI-NEXT: v_readlane_b32 s68, v20, 20 -; VI-NEXT: v_readlane_b32 s67, v20, 19 -; VI-NEXT: v_readlane_b32 s66, v20, 18 -; VI-NEXT: v_readlane_b32 s65, v20, 17 -; VI-NEXT: v_readlane_b32 s64, v20, 16 -; VI-NEXT: v_readlane_b32 s55, v20, 15 -; VI-NEXT: v_readlane_b32 s54, v20, 14 -; VI-NEXT: v_readlane_b32 s53, v20, 13 -; VI-NEXT: v_readlane_b32 s52, v20, 12 -; VI-NEXT: v_readlane_b32 s51, v20, 11 -; VI-NEXT: v_readlane_b32 s50, v20, 10 -; VI-NEXT: v_readlane_b32 s49, v20, 9 -; VI-NEXT: v_readlane_b32 s48, v20, 8 -; VI-NEXT: v_readlane_b32 s39, v20, 7 -; VI-NEXT: v_readlane_b32 s38, v20, 6 -; VI-NEXT: v_readlane_b32 s37, v20, 5 -; VI-NEXT: v_readlane_b32 s36, v20, 4 -; VI-NEXT: v_readlane_b32 s35, v20, 3 -; VI-NEXT: v_readlane_b32 s34, v20, 2 -; VI-NEXT: v_readlane_b32 s31, v20, 1 -; VI-NEXT: v_readlane_b32 s30, v20, 0 +; VI-NEXT: v_readlane_b32 s31, v20, 31 +; VI-NEXT: v_readlane_b32 s87, v20, 29 +; VI-NEXT: v_readlane_b32 s86, v20, 28 +; VI-NEXT: v_readlane_b32 s85, v20, 27 +; VI-NEXT: v_readlane_b32 s84, v20, 26 +; VI-NEXT: v_readlane_b32 s83, v20, 25 +; VI-NEXT: v_readlane_b32 s82, v20, 24 +; VI-NEXT: v_readlane_b32 s81, v20, 23 +; VI-NEXT: v_readlane_b32 s80, v20, 22 +; VI-NEXT: v_readlane_b32 s71, v20, 21 +; VI-NEXT: v_readlane_b32 s70, v20, 20 +; VI-NEXT: v_readlane_b32 s69, v20, 19 +; VI-NEXT: v_readlane_b32 s68, v20, 18 +; VI-NEXT: v_readlane_b32 s67, v20, 17 +; VI-NEXT: v_readlane_b32 s66, v20, 16 +; VI-NEXT: v_readlane_b32 s65, v20, 15 +; VI-NEXT: v_readlane_b32 s64, v20, 14 +; VI-NEXT: v_readlane_b32 s55, v20, 13 +; VI-NEXT: v_readlane_b32 s54, v20, 12 +; VI-NEXT: v_readlane_b32 s53, v20, 11 +; VI-NEXT: v_readlane_b32 s52, v20, 10 +; VI-NEXT: v_readlane_b32 s51, v20, 9 +; VI-NEXT: v_readlane_b32 s50, v20, 8 +; VI-NEXT: v_readlane_b32 s49, v20, 7 +; VI-NEXT: v_readlane_b32 s48, v20, 6 +; VI-NEXT: v_readlane_b32 s39, v20, 5 +; VI-NEXT: v_readlane_b32 s38, v20, 4 +; VI-NEXT: v_readlane_b32 s37, v20, 3 +; VI-NEXT: v_readlane_b32 s36, v20, 2 +; VI-NEXT: v_readlane_b32 s35, v20, 1 +; VI-NEXT: v_readlane_b32 s34, v20, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -217764,43 +220024,57 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_writelane_b32 v63, s64, 16 -; GFX9-NEXT: v_writelane_b32 v63, s65, 17 -; GFX9-NEXT: v_writelane_b32 v63, s66, 18 -; GFX9-NEXT: v_writelane_b32 v63, s67, 19 -; GFX9-NEXT: v_writelane_b32 v63, s68, 20 -; GFX9-NEXT: v_writelane_b32 v63, s69, 21 -; GFX9-NEXT: v_writelane_b32 v63, s70, 22 -; GFX9-NEXT: v_writelane_b32 v63, s71, 23 -; GFX9-NEXT: v_writelane_b32 v63, s80, 24 -; GFX9-NEXT: v_writelane_b32 v63, s81, 25 -; GFX9-NEXT: v_writelane_b32 v63, s82, 26 -; GFX9-NEXT: v_writelane_b32 v63, s83, 27 -; GFX9-NEXT: v_writelane_b32 v63, s84, 28 -; GFX9-NEXT: v_writelane_b32 v63, s85, 29 -; GFX9-NEXT: v_writelane_b32 v63, s86, 30 -; GFX9-NEXT: v_writelane_b32 v63, s87, 31 -; GFX9-NEXT: v_writelane_b32 v63, s96, 32 -; GFX9-NEXT: v_writelane_b32 v63, s97, 33 -; GFX9-NEXT: v_writelane_b32 v63, s98, 34 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s64, 14 +; GFX9-NEXT: v_writelane_b32 v63, s65, 15 +; GFX9-NEXT: v_writelane_b32 v63, s66, 16 +; GFX9-NEXT: v_writelane_b32 v63, s67, 17 +; GFX9-NEXT: v_writelane_b32 v63, s68, 18 +; GFX9-NEXT: v_writelane_b32 v63, s69, 19 +; GFX9-NEXT: v_writelane_b32 v63, s70, 20 +; GFX9-NEXT: v_writelane_b32 v63, s71, 21 +; GFX9-NEXT: v_writelane_b32 v63, s80, 22 +; GFX9-NEXT: v_writelane_b32 v63, s81, 23 +; GFX9-NEXT: v_writelane_b32 v63, s82, 24 +; GFX9-NEXT: v_writelane_b32 v63, s83, 25 +; GFX9-NEXT: v_writelane_b32 v63, s84, 26 +; GFX9-NEXT: v_writelane_b32 v63, s85, 27 +; GFX9-NEXT: v_writelane_b32 v63, s86, 28 +; GFX9-NEXT: v_writelane_b32 v63, s87, 29 +; GFX9-NEXT: v_writelane_b32 v63, s96, 30 +; GFX9-NEXT: v_writelane_b32 v63, s97, 31 +; GFX9-NEXT: v_writelane_b32 v63, s98, 32 +; GFX9-NEXT: v_writelane_b32 v63, s99, 33 +; GFX9-NEXT: v_writelane_b32 v63, s30, 34 +; GFX9-NEXT: v_writelane_b32 v63, s31, 35 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GFX9-NEXT: v_writelane_b32 v63, s99, 35 ; GFX9-NEXT: v_readfirstlane_b32 s44, v3 ; GFX9-NEXT: v_readfirstlane_b32 s45, v4 ; GFX9-NEXT: v_readfirstlane_b32 s42, v5 @@ -217820,20 +220094,6 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[46:47], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; GFX9-NEXT: s_cbranch_scc0 .LBB99_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false @@ -218682,42 +220942,42 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; GFX9-NEXT: v_or_b32_sdwa v25, v25, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_lshlrev_b32_e32 v30, 8, v44 ; GFX9-NEXT: v_or_b32_sdwa v26, v26, v30 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_readlane_b32 s99, v63, 35 -; GFX9-NEXT: v_readlane_b32 s98, v63, 34 -; GFX9-NEXT: v_readlane_b32 s97, v63, 33 -; GFX9-NEXT: v_readlane_b32 s96, v63, 32 -; GFX9-NEXT: v_readlane_b32 s87, v63, 31 -; GFX9-NEXT: v_readlane_b32 s86, v63, 30 -; GFX9-NEXT: v_readlane_b32 s85, v63, 29 -; GFX9-NEXT: v_readlane_b32 s84, v63, 28 -; GFX9-NEXT: v_readlane_b32 s83, v63, 27 -; GFX9-NEXT: v_readlane_b32 s82, v63, 26 -; GFX9-NEXT: v_readlane_b32 s81, v63, 25 -; GFX9-NEXT: v_readlane_b32 s80, v63, 24 -; GFX9-NEXT: v_readlane_b32 s71, v63, 23 -; GFX9-NEXT: v_readlane_b32 s70, v63, 22 -; GFX9-NEXT: v_readlane_b32 s69, v63, 21 -; GFX9-NEXT: v_readlane_b32 s68, v63, 20 -; GFX9-NEXT: v_readlane_b32 s67, v63, 19 -; GFX9-NEXT: v_readlane_b32 s66, v63, 18 -; GFX9-NEXT: v_readlane_b32 s65, v63, 17 -; GFX9-NEXT: v_readlane_b32 s64, v63, 16 -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 34 +; GFX9-NEXT: v_readlane_b32 s31, v63, 35 +; GFX9-NEXT: v_readlane_b32 s99, v63, 33 +; GFX9-NEXT: v_readlane_b32 s98, v63, 32 +; GFX9-NEXT: v_readlane_b32 s97, v63, 31 +; GFX9-NEXT: v_readlane_b32 s96, v63, 30 +; GFX9-NEXT: v_readlane_b32 s87, v63, 29 +; GFX9-NEXT: v_readlane_b32 s86, v63, 28 +; GFX9-NEXT: v_readlane_b32 s85, v63, 27 +; GFX9-NEXT: v_readlane_b32 s84, v63, 26 +; GFX9-NEXT: v_readlane_b32 s83, v63, 25 +; GFX9-NEXT: v_readlane_b32 s82, v63, 24 +; GFX9-NEXT: v_readlane_b32 s81, v63, 23 +; GFX9-NEXT: v_readlane_b32 s80, v63, 22 +; GFX9-NEXT: v_readlane_b32 s71, v63, 21 +; GFX9-NEXT: v_readlane_b32 s70, v63, 20 +; GFX9-NEXT: v_readlane_b32 s69, v63, 19 +; GFX9-NEXT: v_readlane_b32 s68, v63, 18 +; GFX9-NEXT: v_readlane_b32 s67, v63, 17 +; GFX9-NEXT: v_readlane_b32 s66, v63, 16 +; GFX9-NEXT: v_readlane_b32 s65, v63, 15 +; GFX9-NEXT: v_readlane_b32 s64, v63, 14 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v15, 8, v15 ; GFX9-NEXT: v_or_b32_sdwa v15, v38, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -219070,90 +221330,111 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:88 ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v75, s30, 0 -; GFX11-NEXT: v_writelane_b32 v76, s96, 0 -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 -; GFX11-NEXT: v_readfirstlane_b32 s40, v1 -; GFX11-NEXT: v_readfirstlane_b32 s41, v2 -; GFX11-NEXT: v_writelane_b32 v75, s31, 1 -; GFX11-NEXT: v_writelane_b32 v76, s97, 1 -; GFX11-NEXT: v_readfirstlane_b32 s14, v3 -; GFX11-NEXT: v_readfirstlane_b32 s15, v4 -; GFX11-NEXT: v_readfirstlane_b32 s12, v5 -; GFX11-NEXT: v_writelane_b32 v75, s34, 2 -; GFX11-NEXT: v_writelane_b32 v76, s98, 2 -; GFX11-NEXT: v_readfirstlane_b32 s13, v6 -; GFX11-NEXT: v_readfirstlane_b32 s10, v7 -; GFX11-NEXT: v_readfirstlane_b32 s11, v8 -; GFX11-NEXT: v_writelane_b32 v75, s35, 3 -; GFX11-NEXT: v_writelane_b32 v76, s99, 3 -; GFX11-NEXT: v_readfirstlane_b32 s8, v9 -; GFX11-NEXT: v_readfirstlane_b32 s9, v10 -; GFX11-NEXT: v_readfirstlane_b32 s6, v11 -; GFX11-NEXT: v_writelane_b32 v75, s36, 4 -; GFX11-NEXT: v_writelane_b32 v76, s100, 4 -; GFX11-NEXT: v_readfirstlane_b32 s7, v12 -; GFX11-NEXT: v_readfirstlane_b32 s4, v13 -; GFX11-NEXT: v_readfirstlane_b32 s5, v14 -; GFX11-NEXT: v_writelane_b32 v75, s37, 5 -; GFX11-NEXT: v_writelane_b32 v76, s101, 5 -; GFX11-NEXT: s_mov_b32 s99, 0 -; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: s_clause 0x12 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s32 -; GFX11-NEXT: v_writelane_b32 v75, s38, 6 -; GFX11-NEXT: v_writelane_b32 v76, s102, 6 +; GFX11-NEXT: v_writelane_b32 v75, s34, 0 +; GFX11-NEXT: v_writelane_b32 v75, s35, 1 +; GFX11-NEXT: v_writelane_b32 v75, s36, 2 +; GFX11-NEXT: v_writelane_b32 v75, s37, 3 +; GFX11-NEXT: v_writelane_b32 v75, s38, 4 +; GFX11-NEXT: v_writelane_b32 v75, s39, 5 +; GFX11-NEXT: v_writelane_b32 v75, s48, 6 +; GFX11-NEXT: v_writelane_b32 v75, s49, 7 +; GFX11-NEXT: v_writelane_b32 v75, s50, 8 +; GFX11-NEXT: v_writelane_b32 v75, s51, 9 +; GFX11-NEXT: v_writelane_b32 v75, s52, 10 +; GFX11-NEXT: v_writelane_b32 v75, s53, 11 +; GFX11-NEXT: v_writelane_b32 v75, s54, 12 +; GFX11-NEXT: v_writelane_b32 v75, s55, 13 +; GFX11-NEXT: v_writelane_b32 v75, s64, 14 +; GFX11-NEXT: v_writelane_b32 v75, s65, 15 +; GFX11-NEXT: v_writelane_b32 v75, s66, 16 +; GFX11-NEXT: v_writelane_b32 v75, s67, 17 +; GFX11-NEXT: v_writelane_b32 v75, s68, 18 +; GFX11-NEXT: v_writelane_b32 v75, s69, 19 +; GFX11-NEXT: v_writelane_b32 v75, s70, 20 +; GFX11-NEXT: v_writelane_b32 v75, s71, 21 +; GFX11-NEXT: v_writelane_b32 v75, s80, 22 +; GFX11-NEXT: v_writelane_b32 v75, s81, 23 +; GFX11-NEXT: v_writelane_b32 v75, s82, 24 +; GFX11-NEXT: v_writelane_b32 v75, s83, 25 +; GFX11-NEXT: v_writelane_b32 v75, s84, 26 +; GFX11-NEXT: v_writelane_b32 v75, s85, 27 +; GFX11-NEXT: v_writelane_b32 v75, s86, 28 +; GFX11-NEXT: v_writelane_b32 v75, s87, 29 +; GFX11-NEXT: v_writelane_b32 v75, s96, 30 +; GFX11-NEXT: v_writelane_b32 v75, s97, 31 +; GFX11-NEXT: v_writelane_b32 v76, s98, 0 +; GFX11-NEXT: v_writelane_b32 v76, s99, 1 +; GFX11-NEXT: v_writelane_b32 v76, s100, 2 +; GFX11-NEXT: v_writelane_b32 v76, s101, 3 +; GFX11-NEXT: v_writelane_b32 v76, s102, 4 +; GFX11-NEXT: v_writelane_b32 v76, s103, 5 +; GFX11-NEXT: v_writelane_b32 v76, s104, 6 +; GFX11-NEXT: v_writelane_b32 v76, s30, 7 +; GFX11-NEXT: v_writelane_b32 v76, s31, 8 +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v15 +; GFX11-NEXT: v_readfirstlane_b32 s40, v1 +; GFX11-NEXT: v_readfirstlane_b32 s41, v2 +; GFX11-NEXT: v_readfirstlane_b32 s14, v3 +; GFX11-NEXT: v_readfirstlane_b32 s15, v4 +; GFX11-NEXT: v_readfirstlane_b32 s12, v5 +; GFX11-NEXT: v_readfirstlane_b32 s13, v6 +; GFX11-NEXT: v_readfirstlane_b32 s10, v7 +; GFX11-NEXT: v_readfirstlane_b32 s11, v8 +; GFX11-NEXT: v_readfirstlane_b32 s8, v9 +; GFX11-NEXT: v_readfirstlane_b32 s9, v10 +; GFX11-NEXT: v_readfirstlane_b32 s6, v11 +; GFX11-NEXT: v_readfirstlane_b32 s7, v12 +; GFX11-NEXT: v_readfirstlane_b32 s4, v13 +; GFX11-NEXT: v_readfirstlane_b32 s5, v14 +; GFX11-NEXT: s_mov_b32 s99, 0 +; GFX11-NEXT: s_and_b32 s42, vcc_lo, exec_lo ; GFX11-NEXT: ; implicit-def: $vgpr78 : SGPR spill to VGPR lane ; GFX11-NEXT: ; implicit-def: $vgpr77 : SGPR spill to VGPR lane -; GFX11-NEXT: v_writelane_b32 v75, s39, 7 -; GFX11-NEXT: v_writelane_b32 v76, s103, 7 -; GFX11-NEXT: v_writelane_b32 v75, s48, 8 -; GFX11-NEXT: v_writelane_b32 v76, s104, 8 -; GFX11-NEXT: v_writelane_b32 v75, s49, 9 -; GFX11-NEXT: v_writelane_b32 v75, s50, 10 -; GFX11-NEXT: v_writelane_b32 v75, s51, 11 -; GFX11-NEXT: v_writelane_b32 v75, s52, 12 -; GFX11-NEXT: v_writelane_b32 v75, s53, 13 -; GFX11-NEXT: v_writelane_b32 v75, s54, 14 -; GFX11-NEXT: v_writelane_b32 v75, s55, 15 -; GFX11-NEXT: v_writelane_b32 v75, s64, 16 -; GFX11-NEXT: v_writelane_b32 v75, s65, 17 -; GFX11-NEXT: v_writelane_b32 v75, s66, 18 -; GFX11-NEXT: v_writelane_b32 v75, s67, 19 -; GFX11-NEXT: v_writelane_b32 v75, s68, 20 -; GFX11-NEXT: v_writelane_b32 v75, s69, 21 -; GFX11-NEXT: v_writelane_b32 v75, s70, 22 -; GFX11-NEXT: v_writelane_b32 v75, s71, 23 -; GFX11-NEXT: v_writelane_b32 v75, s80, 24 -; GFX11-NEXT: v_writelane_b32 v75, s81, 25 -; GFX11-NEXT: v_writelane_b32 v75, s82, 26 -; GFX11-NEXT: v_writelane_b32 v75, s83, 27 -; GFX11-NEXT: v_writelane_b32 v75, s84, 28 -; GFX11-NEXT: v_writelane_b32 v75, s85, 29 -; GFX11-NEXT: v_writelane_b32 v75, s86, 30 -; GFX11-NEXT: v_writelane_b32 v75, s87, 31 ; GFX11-NEXT: s_cbranch_scc0 .LBB99_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s27, 16 @@ -220042,47 +222323,47 @@ define inreg <128 x i8> @bitcast_v64i16_to_v128i8_scalar(<64 x i16> inreg %a, i3 ; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:64 ; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:72 -; GFX11-NEXT: v_readlane_b32 s104, v76, 8 -; GFX11-NEXT: v_readlane_b32 s103, v76, 7 -; GFX11-NEXT: v_readlane_b32 s102, v76, 6 -; GFX11-NEXT: v_readlane_b32 s101, v76, 5 -; GFX11-NEXT: v_readlane_b32 s100, v76, 4 -; GFX11-NEXT: v_readlane_b32 s99, v76, 3 -; GFX11-NEXT: v_readlane_b32 s98, v76, 2 -; GFX11-NEXT: v_readlane_b32 s97, v76, 1 -; GFX11-NEXT: v_readlane_b32 s96, v76, 0 -; GFX11-NEXT: v_readlane_b32 s87, v75, 31 -; GFX11-NEXT: v_readlane_b32 s86, v75, 30 -; GFX11-NEXT: v_readlane_b32 s85, v75, 29 -; GFX11-NEXT: v_readlane_b32 s84, v75, 28 -; GFX11-NEXT: v_readlane_b32 s83, v75, 27 -; GFX11-NEXT: v_readlane_b32 s82, v75, 26 -; GFX11-NEXT: v_readlane_b32 s81, v75, 25 -; GFX11-NEXT: v_readlane_b32 s80, v75, 24 -; GFX11-NEXT: v_readlane_b32 s71, v75, 23 -; GFX11-NEXT: v_readlane_b32 s70, v75, 22 -; GFX11-NEXT: v_readlane_b32 s69, v75, 21 -; GFX11-NEXT: v_readlane_b32 s68, v75, 20 -; GFX11-NEXT: v_readlane_b32 s67, v75, 19 -; GFX11-NEXT: v_readlane_b32 s66, v75, 18 -; GFX11-NEXT: v_readlane_b32 s65, v75, 17 -; GFX11-NEXT: v_readlane_b32 s64, v75, 16 -; GFX11-NEXT: v_readlane_b32 s55, v75, 15 -; GFX11-NEXT: v_readlane_b32 s54, v75, 14 -; GFX11-NEXT: v_readlane_b32 s53, v75, 13 -; GFX11-NEXT: v_readlane_b32 s52, v75, 12 -; GFX11-NEXT: v_readlane_b32 s51, v75, 11 -; GFX11-NEXT: v_readlane_b32 s50, v75, 10 -; GFX11-NEXT: v_readlane_b32 s49, v75, 9 -; GFX11-NEXT: v_readlane_b32 s48, v75, 8 -; GFX11-NEXT: v_readlane_b32 s39, v75, 7 -; GFX11-NEXT: v_readlane_b32 s38, v75, 6 -; GFX11-NEXT: v_readlane_b32 s37, v75, 5 -; GFX11-NEXT: v_readlane_b32 s36, v75, 4 -; GFX11-NEXT: v_readlane_b32 s35, v75, 3 -; GFX11-NEXT: v_readlane_b32 s34, v75, 2 -; GFX11-NEXT: v_readlane_b32 s31, v75, 1 -; GFX11-NEXT: v_readlane_b32 s30, v75, 0 +; GFX11-NEXT: v_readlane_b32 s30, v76, 7 +; GFX11-NEXT: v_readlane_b32 s31, v76, 8 +; GFX11-NEXT: v_readlane_b32 s104, v76, 6 +; GFX11-NEXT: v_readlane_b32 s103, v76, 5 +; GFX11-NEXT: v_readlane_b32 s102, v76, 4 +; GFX11-NEXT: v_readlane_b32 s101, v76, 3 +; GFX11-NEXT: v_readlane_b32 s100, v76, 2 +; GFX11-NEXT: v_readlane_b32 s99, v76, 1 +; GFX11-NEXT: v_readlane_b32 s98, v76, 0 +; GFX11-NEXT: v_readlane_b32 s97, v75, 31 +; GFX11-NEXT: v_readlane_b32 s96, v75, 30 +; GFX11-NEXT: v_readlane_b32 s87, v75, 29 +; GFX11-NEXT: v_readlane_b32 s86, v75, 28 +; GFX11-NEXT: v_readlane_b32 s85, v75, 27 +; GFX11-NEXT: v_readlane_b32 s84, v75, 26 +; GFX11-NEXT: v_readlane_b32 s83, v75, 25 +; GFX11-NEXT: v_readlane_b32 s82, v75, 24 +; GFX11-NEXT: v_readlane_b32 s81, v75, 23 +; GFX11-NEXT: v_readlane_b32 s80, v75, 22 +; GFX11-NEXT: v_readlane_b32 s71, v75, 21 +; GFX11-NEXT: v_readlane_b32 s70, v75, 20 +; GFX11-NEXT: v_readlane_b32 s69, v75, 19 +; GFX11-NEXT: v_readlane_b32 s68, v75, 18 +; GFX11-NEXT: v_readlane_b32 s67, v75, 17 +; GFX11-NEXT: v_readlane_b32 s66, v75, 16 +; GFX11-NEXT: v_readlane_b32 s65, v75, 15 +; GFX11-NEXT: v_readlane_b32 s64, v75, 14 +; GFX11-NEXT: v_readlane_b32 s55, v75, 13 +; GFX11-NEXT: v_readlane_b32 s54, v75, 12 +; GFX11-NEXT: v_readlane_b32 s53, v75, 11 +; GFX11-NEXT: v_readlane_b32 s52, v75, 10 +; GFX11-NEXT: v_readlane_b32 s51, v75, 9 +; GFX11-NEXT: v_readlane_b32 s50, v75, 8 +; GFX11-NEXT: v_readlane_b32 s49, v75, 7 +; GFX11-NEXT: v_readlane_b32 s48, v75, 6 +; GFX11-NEXT: v_readlane_b32 s39, v75, 5 +; GFX11-NEXT: v_readlane_b32 s38, v75, 4 +; GFX11-NEXT: v_readlane_b32 s37, v75, 3 +; GFX11-NEXT: v_readlane_b32 s36, v75, 2 +; GFX11-NEXT: v_readlane_b32 s35, v75, 1 +; GFX11-NEXT: v_readlane_b32 s34, v75, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:76 @@ -222643,20 +224924,35 @@ define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:8 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v48, v16 ; GFX11-TRUE16-NEXT: s_clause 0x1 @@ -224991,9 +227287,11 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_writelane_b32 v42, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; VI-NEXT: v_writelane_b32 v42, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; VI-NEXT: v_mov_b32_e32 v31, v17 ; VI-NEXT: v_mov_b32_e32 v30, v16 ; VI-NEXT: v_mov_b32_e32 v29, v15 @@ -225013,8 +227311,6 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB101_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB101_4 @@ -225619,9 +227915,9 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; VI-NEXT: .LBB101_5: ; %end ; VI-NEXT: buffer_load_dword v41, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; VI-NEXT: v_readlane_b32 s30, v42, 0 ; VI-NEXT: v_mov_b32_e32 v18, v32 ; VI-NEXT: v_readlane_b32 s31, v42, 1 -; VI-NEXT: v_readlane_b32 s30, v42, 0 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -225634,9 +227930,12 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v43, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; GFX9-NEXT: v_writelane_b32 v43, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; GFX9-NEXT: v_mov_b32_e32 v31, v17 ; GFX9-NEXT: v_mov_b32_e32 v30, v16 ; GFX9-NEXT: v_mov_b32_e32 v29, v15 @@ -225656,9 +227955,6 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB101_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB101_4 @@ -226297,9 +228593,9 @@ define inreg <64 x half> @bitcast_v64bf16_to_v64f16_scalar(<64 x bfloat> inreg % ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-NEXT: v_readlane_b32 s30, v43, 0 ; GFX9-NEXT: v_mov_b32_e32 v18, v32 ; GFX9-NEXT: v_readlane_b32 s31, v43, 1 -; GFX9-NEXT: v_readlane_b32 s30, v43, 0 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -235210,9 +237506,11 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_writelane_b32 v42, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; VI-NEXT: v_writelane_b32 v42, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; VI-NEXT: v_mov_b32_e32 v31, v17 ; VI-NEXT: v_mov_b32_e32 v30, v16 ; VI-NEXT: v_mov_b32_e32 v29, v15 @@ -235232,8 +237530,6 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB105_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB105_4 @@ -235838,9 +238134,9 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; VI-NEXT: .LBB105_5: ; %end ; VI-NEXT: buffer_load_dword v41, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; VI-NEXT: v_readlane_b32 s30, v42, 0 ; VI-NEXT: v_mov_b32_e32 v18, v32 ; VI-NEXT: v_readlane_b32 s31, v42, 1 -; VI-NEXT: v_readlane_b32 s30, v42, 0 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -235853,9 +238149,12 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v43, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; GFX9-NEXT: v_writelane_b32 v43, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; GFX9-NEXT: v_mov_b32_e32 v31, v17 ; GFX9-NEXT: v_mov_b32_e32 v30, v16 ; GFX9-NEXT: v_mov_b32_e32 v29, v15 @@ -235875,9 +238174,6 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB105_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB105_4 @@ -236484,9 +238780,9 @@ define inreg <64 x i16> @bitcast_v64bf16_to_v64i16_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-NEXT: v_readlane_b32 s30, v43, 0 ; GFX9-NEXT: v_mov_b32_e32 v18, v32 ; GFX9-NEXT: v_readlane_b32 s31, v43, 1 -; GFX9-NEXT: v_readlane_b32 s30, v43, 0 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -238840,6 +241136,43 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] +; SI-NEXT: s_waitcnt expcnt(1) +; SI-NEXT: v_writelane_b32 v40, s34, 0 +; SI-NEXT: v_writelane_b32 v40, s35, 1 +; SI-NEXT: v_writelane_b32 v40, s36, 2 +; SI-NEXT: v_writelane_b32 v40, s37, 3 +; SI-NEXT: v_writelane_b32 v40, s38, 4 +; SI-NEXT: v_writelane_b32 v40, s39, 5 +; SI-NEXT: v_writelane_b32 v40, s48, 6 +; SI-NEXT: v_writelane_b32 v40, s49, 7 +; SI-NEXT: v_writelane_b32 v40, s50, 8 +; SI-NEXT: v_writelane_b32 v40, s51, 9 +; SI-NEXT: v_writelane_b32 v40, s52, 10 +; SI-NEXT: v_writelane_b32 v40, s53, 11 +; SI-NEXT: v_writelane_b32 v40, s54, 12 +; SI-NEXT: v_writelane_b32 v40, s55, 13 +; SI-NEXT: v_writelane_b32 v40, s64, 14 +; SI-NEXT: v_writelane_b32 v40, s65, 15 +; SI-NEXT: v_writelane_b32 v40, s66, 16 +; SI-NEXT: v_writelane_b32 v40, s67, 17 +; SI-NEXT: v_writelane_b32 v40, s68, 18 +; SI-NEXT: v_writelane_b32 v40, s69, 19 +; SI-NEXT: v_writelane_b32 v40, s70, 20 +; SI-NEXT: v_writelane_b32 v40, s71, 21 +; SI-NEXT: v_writelane_b32 v40, s80, 22 +; SI-NEXT: v_writelane_b32 v40, s81, 23 +; SI-NEXT: v_writelane_b32 v40, s82, 24 +; SI-NEXT: v_writelane_b32 v40, s83, 25 +; SI-NEXT: v_writelane_b32 v40, s84, 26 +; SI-NEXT: v_writelane_b32 v40, s85, 27 +; SI-NEXT: v_writelane_b32 v40, s86, 28 +; SI-NEXT: v_writelane_b32 v40, s87, 29 +; SI-NEXT: v_writelane_b32 v40, s96, 30 +; SI-NEXT: v_writelane_b32 v40, s97, 31 +; SI-NEXT: v_writelane_b32 v40, s98, 32 +; SI-NEXT: v_writelane_b32 v40, s99, 33 +; SI-NEXT: v_writelane_b32 v40, s30, 34 +; SI-NEXT: v_writelane_b32 v40, s31, 35 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 ; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:72 ; SI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:68 @@ -238848,39 +241181,6 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; SI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:56 ; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:52 ; SI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:48 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v40, s30, 0 -; SI-NEXT: v_writelane_b32 v40, s31, 1 -; SI-NEXT: v_writelane_b32 v40, s34, 2 -; SI-NEXT: v_writelane_b32 v40, s35, 3 -; SI-NEXT: v_writelane_b32 v40, s36, 4 -; SI-NEXT: v_writelane_b32 v40, s37, 5 -; SI-NEXT: v_writelane_b32 v40, s38, 6 -; SI-NEXT: v_writelane_b32 v40, s39, 7 -; SI-NEXT: v_writelane_b32 v40, s48, 8 -; SI-NEXT: v_writelane_b32 v40, s49, 9 -; SI-NEXT: v_writelane_b32 v40, s50, 10 -; SI-NEXT: v_writelane_b32 v40, s51, 11 -; SI-NEXT: v_writelane_b32 v40, s52, 12 -; SI-NEXT: v_writelane_b32 v40, s53, 13 -; SI-NEXT: v_writelane_b32 v40, s54, 14 -; SI-NEXT: v_writelane_b32 v40, s55, 15 -; SI-NEXT: v_writelane_b32 v40, s64, 16 -; SI-NEXT: v_writelane_b32 v40, s65, 17 -; SI-NEXT: v_writelane_b32 v40, s66, 18 -; SI-NEXT: v_writelane_b32 v40, s67, 19 -; SI-NEXT: v_writelane_b32 v40, s68, 20 -; SI-NEXT: v_writelane_b32 v40, s69, 21 -; SI-NEXT: v_writelane_b32 v40, s70, 22 -; SI-NEXT: v_writelane_b32 v40, s71, 23 -; SI-NEXT: v_writelane_b32 v40, s80, 24 -; SI-NEXT: v_writelane_b32 v40, s81, 25 -; SI-NEXT: v_writelane_b32 v40, s82, 26 -; SI-NEXT: v_writelane_b32 v40, s83, 27 -; SI-NEXT: v_writelane_b32 v40, s84, 28 -; SI-NEXT: v_writelane_b32 v40, s85, 29 -; SI-NEXT: v_writelane_b32 v40, s86, 30 -; SI-NEXT: v_writelane_b32 v40, s87, 31 ; SI-NEXT: ; implicit-def: $vgpr41 : SGPR spill to VGPR lane ; SI-NEXT: s_mov_b32 s60, s16 ; SI-NEXT: s_waitcnt expcnt(0) @@ -238920,9 +241220,32 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; SI-NEXT: v_writelane_b32 v41, s34, 19 ; SI-NEXT: v_readfirstlane_b32 s36, v10 ; SI-NEXT: v_writelane_b32 v41, s35, 20 -; SI-NEXT: v_writelane_b32 v40, s96, 32 ; SI-NEXT: v_readfirstlane_b32 s37, v9 ; SI-NEXT: v_writelane_b32 v41, s36, 21 +; SI-NEXT: v_readfirstlane_b32 s38, v12 +; SI-NEXT: v_writelane_b32 v41, s37, 22 +; SI-NEXT: v_readfirstlane_b32 s14, v30 +; SI-NEXT: v_readfirstlane_b32 s15, v29 +; SI-NEXT: v_readfirstlane_b32 s12, v28 +; SI-NEXT: v_readfirstlane_b32 s13, v27 +; SI-NEXT: v_readfirstlane_b32 s10, v26 +; SI-NEXT: v_readfirstlane_b32 s11, v25 +; SI-NEXT: v_readfirstlane_b32 s8, v24 +; SI-NEXT: v_readfirstlane_b32 s9, v23 +; SI-NEXT: v_readfirstlane_b32 s88, v22 +; SI-NEXT: v_readfirstlane_b32 s29, v21 +; SI-NEXT: v_readfirstlane_b32 s79, v20 +; SI-NEXT: v_readfirstlane_b32 s27, v19 +; SI-NEXT: v_readfirstlane_b32 s78, v18 +; SI-NEXT: v_readfirstlane_b32 s25, v17 +; SI-NEXT: v_readfirstlane_b32 s77, v16 +; SI-NEXT: v_readfirstlane_b32 s23, v15 +; SI-NEXT: v_readfirstlane_b32 s39, v14 +; SI-NEXT: v_readfirstlane_b32 s21, v13 +; SI-NEXT: v_readfirstlane_b32 s19, v11 +; SI-NEXT: v_readfirstlane_b32 s18, v1 +; SI-NEXT: v_writelane_b32 v41, s38, 23 +; SI-NEXT: v_writelane_b32 v41, s39, 24 ; SI-NEXT: s_waitcnt vmcnt(7) ; SI-NEXT: v_readfirstlane_b32 s62, v31 ; SI-NEXT: s_waitcnt vmcnt(6) @@ -238951,33 +241274,6 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; SI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:8 ; SI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4 ; SI-NEXT: buffer_load_dword v37, off, s[0:3], s32 -; SI-NEXT: v_writelane_b32 v40, s97, 33 -; SI-NEXT: v_readfirstlane_b32 s38, v12 -; SI-NEXT: v_writelane_b32 v41, s37, 22 -; SI-NEXT: v_writelane_b32 v40, s98, 34 -; SI-NEXT: v_readfirstlane_b32 s14, v30 -; SI-NEXT: v_readfirstlane_b32 s15, v29 -; SI-NEXT: v_readfirstlane_b32 s12, v28 -; SI-NEXT: v_readfirstlane_b32 s13, v27 -; SI-NEXT: v_readfirstlane_b32 s10, v26 -; SI-NEXT: v_readfirstlane_b32 s11, v25 -; SI-NEXT: v_readfirstlane_b32 s8, v24 -; SI-NEXT: v_readfirstlane_b32 s9, v23 -; SI-NEXT: v_readfirstlane_b32 s88, v22 -; SI-NEXT: v_readfirstlane_b32 s29, v21 -; SI-NEXT: v_readfirstlane_b32 s79, v20 -; SI-NEXT: v_readfirstlane_b32 s27, v19 -; SI-NEXT: v_readfirstlane_b32 s78, v18 -; SI-NEXT: v_readfirstlane_b32 s25, v17 -; SI-NEXT: v_readfirstlane_b32 s77, v16 -; SI-NEXT: v_readfirstlane_b32 s23, v15 -; SI-NEXT: v_readfirstlane_b32 s39, v14 -; SI-NEXT: v_readfirstlane_b32 s21, v13 -; SI-NEXT: v_readfirstlane_b32 s19, v11 -; SI-NEXT: v_readfirstlane_b32 s18, v1 -; SI-NEXT: v_writelane_b32 v41, s38, 23 -; SI-NEXT: v_writelane_b32 v40, s99, 35 -; SI-NEXT: v_writelane_b32 v41, s39, 24 ; SI-NEXT: s_waitcnt vmcnt(12) ; SI-NEXT: v_readfirstlane_b32 s58, v31 ; SI-NEXT: s_waitcnt vmcnt(11) @@ -239672,43 +241968,43 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; SI-NEXT: v_mul_f32_e64 v2, 1.0, s5 ; SI-NEXT: v_alignbit_b32 v1, v1, v2, 16 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 +; SI-NEXT: v_readlane_b32 s30, v40, 34 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v40, 35 -; SI-NEXT: v_readlane_b32 s98, v40, 34 -; SI-NEXT: v_readlane_b32 s97, v40, 33 -; SI-NEXT: v_readlane_b32 s96, v40, 32 -; SI-NEXT: v_readlane_b32 s87, v40, 31 -; SI-NEXT: v_readlane_b32 s86, v40, 30 -; SI-NEXT: v_readlane_b32 s85, v40, 29 -; SI-NEXT: v_readlane_b32 s84, v40, 28 -; SI-NEXT: v_readlane_b32 s83, v40, 27 -; SI-NEXT: v_readlane_b32 s82, v40, 26 -; SI-NEXT: v_readlane_b32 s81, v40, 25 -; SI-NEXT: v_readlane_b32 s80, v40, 24 -; SI-NEXT: v_readlane_b32 s71, v40, 23 -; SI-NEXT: v_readlane_b32 s70, v40, 22 -; SI-NEXT: v_readlane_b32 s69, v40, 21 -; SI-NEXT: v_readlane_b32 s68, v40, 20 -; SI-NEXT: v_readlane_b32 s67, v40, 19 -; SI-NEXT: v_readlane_b32 s66, v40, 18 -; SI-NEXT: v_readlane_b32 s65, v40, 17 -; SI-NEXT: v_readlane_b32 s64, v40, 16 -; SI-NEXT: v_readlane_b32 s55, v40, 15 -; SI-NEXT: v_readlane_b32 s54, v40, 14 -; SI-NEXT: v_readlane_b32 s53, v40, 13 -; SI-NEXT: v_readlane_b32 s52, v40, 12 -; SI-NEXT: v_readlane_b32 s51, v40, 11 -; SI-NEXT: v_readlane_b32 s50, v40, 10 -; SI-NEXT: v_readlane_b32 s49, v40, 9 -; SI-NEXT: v_readlane_b32 s48, v40, 8 -; SI-NEXT: v_readlane_b32 s39, v40, 7 -; SI-NEXT: v_readlane_b32 s38, v40, 6 -; SI-NEXT: v_readlane_b32 s37, v40, 5 -; SI-NEXT: v_readlane_b32 s36, v40, 4 -; SI-NEXT: v_readlane_b32 s35, v40, 3 -; SI-NEXT: v_readlane_b32 s34, v40, 2 -; SI-NEXT: v_readlane_b32 s31, v40, 1 -; SI-NEXT: v_readlane_b32 s30, v40, 0 +; SI-NEXT: v_readlane_b32 s31, v40, 35 +; SI-NEXT: v_readlane_b32 s99, v40, 33 +; SI-NEXT: v_readlane_b32 s98, v40, 32 +; SI-NEXT: v_readlane_b32 s97, v40, 31 +; SI-NEXT: v_readlane_b32 s96, v40, 30 +; SI-NEXT: v_readlane_b32 s87, v40, 29 +; SI-NEXT: v_readlane_b32 s86, v40, 28 +; SI-NEXT: v_readlane_b32 s85, v40, 27 +; SI-NEXT: v_readlane_b32 s84, v40, 26 +; SI-NEXT: v_readlane_b32 s83, v40, 25 +; SI-NEXT: v_readlane_b32 s82, v40, 24 +; SI-NEXT: v_readlane_b32 s81, v40, 23 +; SI-NEXT: v_readlane_b32 s80, v40, 22 +; SI-NEXT: v_readlane_b32 s71, v40, 21 +; SI-NEXT: v_readlane_b32 s70, v40, 20 +; SI-NEXT: v_readlane_b32 s69, v40, 19 +; SI-NEXT: v_readlane_b32 s68, v40, 18 +; SI-NEXT: v_readlane_b32 s67, v40, 17 +; SI-NEXT: v_readlane_b32 s66, v40, 16 +; SI-NEXT: v_readlane_b32 s65, v40, 15 +; SI-NEXT: v_readlane_b32 s64, v40, 14 +; SI-NEXT: v_readlane_b32 s55, v40, 13 +; SI-NEXT: v_readlane_b32 s54, v40, 12 +; SI-NEXT: v_readlane_b32 s53, v40, 11 +; SI-NEXT: v_readlane_b32 s52, v40, 10 +; SI-NEXT: v_readlane_b32 s51, v40, 9 +; SI-NEXT: v_readlane_b32 s50, v40, 8 +; SI-NEXT: v_readlane_b32 s49, v40, 7 +; SI-NEXT: v_readlane_b32 s48, v40, 6 +; SI-NEXT: v_readlane_b32 s39, v40, 5 +; SI-NEXT: v_readlane_b32 s38, v40, 4 +; SI-NEXT: v_readlane_b32 s37, v40, 3 +; SI-NEXT: v_readlane_b32 s36, v40, 2 +; SI-NEXT: v_readlane_b32 s35, v40, 1 +; SI-NEXT: v_readlane_b32 s34, v40, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload @@ -239722,14 +242018,15 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v32, s30, 0 -; VI-NEXT: v_writelane_b32 v32, s31, 1 -; VI-NEXT: v_writelane_b32 v32, s34, 2 -; VI-NEXT: v_writelane_b32 v32, s35, 3 -; VI-NEXT: v_writelane_b32 v32, s36, 4 -; VI-NEXT: v_writelane_b32 v32, s37, 5 +; VI-NEXT: v_writelane_b32 v32, s34, 0 +; VI-NEXT: v_writelane_b32 v32, s35, 1 +; VI-NEXT: v_writelane_b32 v32, s36, 2 +; VI-NEXT: v_writelane_b32 v32, s37, 3 +; VI-NEXT: v_writelane_b32 v32, s38, 4 +; VI-NEXT: v_writelane_b32 v32, s39, 5 +; VI-NEXT: v_writelane_b32 v32, s30, 6 +; VI-NEXT: v_writelane_b32 v32, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 -; VI-NEXT: v_writelane_b32 v32, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s47, v2 ; VI-NEXT: v_readfirstlane_b32 s46, v3 ; VI-NEXT: v_readfirstlane_b32 s45, v4 @@ -239749,7 +242046,6 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; VI-NEXT: v_readfirstlane_b32 s6, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v1 -; VI-NEXT: v_writelane_b32 v32, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB107_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB107_3 @@ -239915,6 +242211,7 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; VI-NEXT: s_add_i32 s46, s46, 0x30000 ; VI-NEXT: s_add_i32 s47, s4, 0x30000 ; VI-NEXT: .LBB107_3: ; %end +; VI-NEXT: v_readlane_b32 s30, v32, 6 ; VI-NEXT: v_mov_b32_e32 v0, s16 ; VI-NEXT: v_mov_b32_e32 v1, s17 ; VI-NEXT: v_mov_b32_e32 v2, s18 @@ -239947,14 +242244,13 @@ define inreg <64 x bfloat> @bitcast_v64i16_to_v64bf16_scalar(<64 x i16> inreg %a ; VI-NEXT: v_mov_b32_e32 v29, s10 ; VI-NEXT: v_mov_b32_e32 v30, s9 ; VI-NEXT: v_mov_b32_e32 v31, s8 -; VI-NEXT: v_readlane_b32 s39, v32, 7 -; VI-NEXT: v_readlane_b32 s38, v32, 6 -; VI-NEXT: v_readlane_b32 s37, v32, 5 -; VI-NEXT: v_readlane_b32 s36, v32, 4 -; VI-NEXT: v_readlane_b32 s35, v32, 3 -; VI-NEXT: v_readlane_b32 s34, v32, 2 -; VI-NEXT: v_readlane_b32 s31, v32, 1 -; VI-NEXT: v_readlane_b32 s30, v32, 0 +; VI-NEXT: v_readlane_b32 s31, v32, 7 +; VI-NEXT: v_readlane_b32 s39, v32, 5 +; VI-NEXT: v_readlane_b32 s38, v32, 4 +; VI-NEXT: v_readlane_b32 s37, v32, 3 +; VI-NEXT: v_readlane_b32 s36, v32, 2 +; VI-NEXT: v_readlane_b32 s35, v32, 1 +; VI-NEXT: v_readlane_b32 s34, v32, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -244357,14 +246653,15 @@ define inreg <64 x half> @bitcast_v64i16_to_v64f16_scalar(<64 x i16> inreg %a, i ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v32, s30, 0 -; VI-NEXT: v_writelane_b32 v32, s31, 1 -; VI-NEXT: v_writelane_b32 v32, s34, 2 -; VI-NEXT: v_writelane_b32 v32, s35, 3 -; VI-NEXT: v_writelane_b32 v32, s36, 4 -; VI-NEXT: v_writelane_b32 v32, s37, 5 +; VI-NEXT: v_writelane_b32 v32, s34, 0 +; VI-NEXT: v_writelane_b32 v32, s35, 1 +; VI-NEXT: v_writelane_b32 v32, s36, 2 +; VI-NEXT: v_writelane_b32 v32, s37, 3 +; VI-NEXT: v_writelane_b32 v32, s38, 4 +; VI-NEXT: v_writelane_b32 v32, s39, 5 +; VI-NEXT: v_writelane_b32 v32, s30, 6 +; VI-NEXT: v_writelane_b32 v32, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 -; VI-NEXT: v_writelane_b32 v32, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s47, v2 ; VI-NEXT: v_readfirstlane_b32 s46, v3 ; VI-NEXT: v_readfirstlane_b32 s45, v4 @@ -244384,7 +246681,6 @@ define inreg <64 x half> @bitcast_v64i16_to_v64f16_scalar(<64 x i16> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s6, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v1 -; VI-NEXT: v_writelane_b32 v32, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB111_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB111_3 @@ -244550,6 +246846,7 @@ define inreg <64 x half> @bitcast_v64i16_to_v64f16_scalar(<64 x i16> inreg %a, i ; VI-NEXT: s_add_i32 s46, s46, 0x30000 ; VI-NEXT: s_add_i32 s47, s4, 0x30000 ; VI-NEXT: .LBB111_3: ; %end +; VI-NEXT: v_readlane_b32 s30, v32, 6 ; VI-NEXT: v_mov_b32_e32 v0, s16 ; VI-NEXT: v_mov_b32_e32 v1, s17 ; VI-NEXT: v_mov_b32_e32 v2, s18 @@ -244582,14 +246879,13 @@ define inreg <64 x half> @bitcast_v64i16_to_v64f16_scalar(<64 x i16> inreg %a, i ; VI-NEXT: v_mov_b32_e32 v29, s10 ; VI-NEXT: v_mov_b32_e32 v30, s9 ; VI-NEXT: v_mov_b32_e32 v31, s8 -; VI-NEXT: v_readlane_b32 s39, v32, 7 -; VI-NEXT: v_readlane_b32 s38, v32, 6 -; VI-NEXT: v_readlane_b32 s37, v32, 5 -; VI-NEXT: v_readlane_b32 s36, v32, 4 -; VI-NEXT: v_readlane_b32 s35, v32, 3 -; VI-NEXT: v_readlane_b32 s34, v32, 2 -; VI-NEXT: v_readlane_b32 s31, v32, 1 -; VI-NEXT: v_readlane_b32 s30, v32, 0 +; VI-NEXT: v_readlane_b32 s31, v32, 7 +; VI-NEXT: v_readlane_b32 s39, v32, 5 +; VI-NEXT: v_readlane_b32 s38, v32, 4 +; VI-NEXT: v_readlane_b32 s37, v32, 3 +; VI-NEXT: v_readlane_b32 s36, v32, 2 +; VI-NEXT: v_readlane_b32 s35, v32, 1 +; VI-NEXT: v_readlane_b32 s34, v32, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll index 01e397d629ea9..a48eb27460f7d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll @@ -30553,14 +30553,14 @@ define <32 x i8> @bitcast_v16i16_to_v32i8(<16 x i16> %a, i32 %b) { ; SI-LABEL: bitcast_v16i16_to_v32i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mov_b32_e32 v48, v15 -; SI-NEXT: v_mov_b32_e32 v49, v11 -; SI-NEXT: v_mov_b32_e32 v50, v7 -; SI-NEXT: v_mov_b32_e32 v51, v3 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_mov_b32_e32 v48, v15 +; SI-NEXT: v_mov_b32_e32 v49, v11 +; SI-NEXT: v_mov_b32_e32 v50, v7 +; SI-NEXT: v_mov_b32_e32 v51, v3 ; SI-NEXT: v_mov_b32_e32 v32, v14 ; SI-NEXT: v_mov_b32_e32 v37, v12 ; SI-NEXT: v_mov_b32_e32 v33, v10 @@ -40102,11 +40102,11 @@ define inreg <32 x i8> @bitcast_v16bf16_to_v32i8_scalar(<16 x bfloat> inreg %a, ; SI-LABEL: bitcast_v16bf16_to_v32i8_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mul_f32_e64 v24, 1.0, s17 ; SI-NEXT: v_mul_f32_e64 v32, 1.0, s16 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll index 9041f64cb17fb..7adaa6d3c3651 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll @@ -15733,6 +15733,10 @@ define <40 x i8> @bitcast_v20i16_to_v40i8(<20 x i16> %a, i32 %b) { ; VI-LABEL: bitcast_v20i16_to_v40i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v10 ; VI-NEXT: v_lshrrev_b32_e32 v20, 16, v9 @@ -15744,10 +15748,6 @@ define <40 x i8> @bitcast_v20i16_to_v40i8(<20 x i16> %a, i32 %b) { ; VI-NEXT: v_lshrrev_b32_e32 v25, 16, v3 ; VI-NEXT: v_lshrrev_b32_e32 v23, 16, v2 ; VI-NEXT: v_lshrrev_b32_e32 v26, 16, v1 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr34 ; VI-NEXT: ; implicit-def: $vgpr40 ; VI-NEXT: ; implicit-def: $vgpr15 @@ -16525,18 +16525,18 @@ define inreg <40 x i8> @bitcast_v20i16_to_v40i8_scalar(<20 x i16> inreg %a, i32 ; SI-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v8, s30, 0 -; SI-NEXT: v_writelane_b32 v8, s31, 1 -; SI-NEXT: v_writelane_b32 v8, s34, 2 -; SI-NEXT: v_writelane_b32 v8, s35, 3 -; SI-NEXT: v_writelane_b32 v8, s36, 4 -; SI-NEXT: v_writelane_b32 v8, s37, 5 -; SI-NEXT: v_writelane_b32 v8, s38, 6 -; SI-NEXT: v_writelane_b32 v8, s39, 7 -; SI-NEXT: v_writelane_b32 v8, s48, 8 -; SI-NEXT: v_writelane_b32 v8, s49, 9 +; SI-NEXT: v_writelane_b32 v8, s34, 0 +; SI-NEXT: v_writelane_b32 v8, s35, 1 +; SI-NEXT: v_writelane_b32 v8, s36, 2 +; SI-NEXT: v_writelane_b32 v8, s37, 3 +; SI-NEXT: v_writelane_b32 v8, s38, 4 +; SI-NEXT: v_writelane_b32 v8, s39, 5 +; SI-NEXT: v_writelane_b32 v8, s48, 6 +; SI-NEXT: v_writelane_b32 v8, s49, 7 +; SI-NEXT: v_writelane_b32 v8, s50, 8 +; SI-NEXT: v_writelane_b32 v8, s30, 9 +; SI-NEXT: v_writelane_b32 v8, s31, 10 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; SI-NEXT: v_writelane_b32 v8, s50, 10 ; SI-NEXT: v_readfirstlane_b32 s39, v6 ; SI-NEXT: v_readfirstlane_b32 s48, v5 ; SI-NEXT: v_readfirstlane_b32 s49, v4 @@ -16815,18 +16815,18 @@ define inreg <40 x i8> @bitcast_v20i16_to_v40i8_scalar(<20 x i16> inreg %a, i32 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 36, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v8, 9 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s50, v8, 10 -; SI-NEXT: v_readlane_b32 s49, v8, 9 -; SI-NEXT: v_readlane_b32 s48, v8, 8 -; SI-NEXT: v_readlane_b32 s39, v8, 7 -; SI-NEXT: v_readlane_b32 s38, v8, 6 -; SI-NEXT: v_readlane_b32 s37, v8, 5 -; SI-NEXT: v_readlane_b32 s36, v8, 4 -; SI-NEXT: v_readlane_b32 s35, v8, 3 -; SI-NEXT: v_readlane_b32 s34, v8, 2 -; SI-NEXT: v_readlane_b32 s31, v8, 1 -; SI-NEXT: v_readlane_b32 s30, v8, 0 +; SI-NEXT: v_readlane_b32 s31, v8, 10 +; SI-NEXT: v_readlane_b32 s50, v8, 8 +; SI-NEXT: v_readlane_b32 s49, v8, 7 +; SI-NEXT: v_readlane_b32 s48, v8, 6 +; SI-NEXT: v_readlane_b32 s39, v8, 5 +; SI-NEXT: v_readlane_b32 s38, v8, 4 +; SI-NEXT: v_readlane_b32 s37, v8, 3 +; SI-NEXT: v_readlane_b32 s36, v8, 2 +; SI-NEXT: v_readlane_b32 s35, v8, 1 +; SI-NEXT: v_readlane_b32 s34, v8, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll index ee23420c2a662..de18eec1ccc79 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll @@ -6673,8 +6673,8 @@ define inreg <16 x i32> @bitcast_v32bf16_to_v16i32_scalar(<32 x bfloat> inreg %a ; VI-NEXT: buffer_store_dword v19, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v19, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v19, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -6992,8 +6992,8 @@ define inreg <16 x i32> @bitcast_v32bf16_to_v16i32_scalar(<32 x bfloat> inreg %a ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB23_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: v_readlane_b32 s30, v19, 0 +; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -7007,8 +7007,8 @@ define inreg <16 x i32> @bitcast_v32bf16_to_v16i32_scalar(<32 x bfloat> inreg %a ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -7343,8 +7343,8 @@ define inreg <16 x i32> @bitcast_v32bf16_to_v16i32_scalar(<32 x bfloat> inreg %a ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB23_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -8062,8 +8062,6 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v16i32_to_v64i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -8080,6 +8078,8 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: ; implicit-def: $vgpr58 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -8481,10 +8481,6 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v16i32_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: ; kill: killed $vgpr17 -; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -8501,6 +8497,10 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; VI-NEXT: ; implicit-def: $vgpr17 +; VI-NEXT: ; kill: killed $vgpr17 +; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr29 ; VI-NEXT: ; implicit-def: $vgpr27 ; VI-NEXT: ; implicit-def: $vgpr22 @@ -8812,10 +8812,6 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v16i32_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -8832,6 +8828,10 @@ define <64 x i8> @bitcast_v16i32_to_v64i8(<16 x i32> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -9707,40 +9707,40 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; SI-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v4, s30, 0 -; SI-NEXT: v_writelane_b32 v4, s31, 1 -; SI-NEXT: v_writelane_b32 v4, s34, 2 -; SI-NEXT: v_writelane_b32 v4, s35, 3 -; SI-NEXT: v_writelane_b32 v4, s36, 4 -; SI-NEXT: v_writelane_b32 v4, s37, 5 -; SI-NEXT: v_writelane_b32 v4, s38, 6 -; SI-NEXT: v_writelane_b32 v4, s39, 7 -; SI-NEXT: v_writelane_b32 v4, s48, 8 -; SI-NEXT: v_writelane_b32 v4, s49, 9 -; SI-NEXT: v_writelane_b32 v4, s50, 10 -; SI-NEXT: v_writelane_b32 v4, s51, 11 -; SI-NEXT: v_writelane_b32 v4, s52, 12 -; SI-NEXT: v_writelane_b32 v4, s53, 13 -; SI-NEXT: v_writelane_b32 v4, s54, 14 -; SI-NEXT: v_writelane_b32 v4, s55, 15 -; SI-NEXT: v_writelane_b32 v4, s64, 16 -; SI-NEXT: v_writelane_b32 v4, s65, 17 -; SI-NEXT: v_writelane_b32 v4, s66, 18 -; SI-NEXT: v_writelane_b32 v4, s67, 19 -; SI-NEXT: v_writelane_b32 v4, s68, 20 -; SI-NEXT: v_writelane_b32 v4, s69, 21 -; SI-NEXT: v_writelane_b32 v4, s70, 22 -; SI-NEXT: v_writelane_b32 v4, s71, 23 -; SI-NEXT: v_writelane_b32 v4, s80, 24 -; SI-NEXT: v_writelane_b32 v4, s81, 25 -; SI-NEXT: v_writelane_b32 v4, s82, 26 -; SI-NEXT: v_writelane_b32 v4, s83, 27 +; SI-NEXT: v_writelane_b32 v4, s34, 0 +; SI-NEXT: v_writelane_b32 v4, s35, 1 +; SI-NEXT: v_writelane_b32 v4, s36, 2 +; SI-NEXT: v_writelane_b32 v4, s37, 3 +; SI-NEXT: v_writelane_b32 v4, s38, 4 +; SI-NEXT: v_writelane_b32 v4, s39, 5 +; SI-NEXT: v_writelane_b32 v4, s48, 6 +; SI-NEXT: v_writelane_b32 v4, s49, 7 +; SI-NEXT: v_writelane_b32 v4, s50, 8 +; SI-NEXT: v_writelane_b32 v4, s51, 9 +; SI-NEXT: v_writelane_b32 v4, s52, 10 +; SI-NEXT: v_writelane_b32 v4, s53, 11 +; SI-NEXT: v_writelane_b32 v4, s54, 12 +; SI-NEXT: v_writelane_b32 v4, s55, 13 +; SI-NEXT: v_writelane_b32 v4, s64, 14 +; SI-NEXT: v_writelane_b32 v4, s65, 15 +; SI-NEXT: v_writelane_b32 v4, s66, 16 +; SI-NEXT: v_writelane_b32 v4, s67, 17 +; SI-NEXT: v_writelane_b32 v4, s68, 18 +; SI-NEXT: v_writelane_b32 v4, s69, 19 +; SI-NEXT: v_writelane_b32 v4, s70, 20 +; SI-NEXT: v_writelane_b32 v4, s71, 21 +; SI-NEXT: v_writelane_b32 v4, s80, 22 +; SI-NEXT: v_writelane_b32 v4, s81, 23 +; SI-NEXT: v_writelane_b32 v4, s82, 24 +; SI-NEXT: v_writelane_b32 v4, s83, 25 +; SI-NEXT: v_writelane_b32 v4, s84, 26 +; SI-NEXT: v_writelane_b32 v4, s85, 27 +; SI-NEXT: v_writelane_b32 v4, s30, 28 +; SI-NEXT: v_writelane_b32 v4, s31, 29 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; SI-NEXT: v_writelane_b32 v4, s84, 28 ; SI-NEXT: v_readfirstlane_b32 s4, v1 ; SI-NEXT: s_and_b64 s[6:7], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v2 -; SI-NEXT: v_writelane_b32 v4, s85, 29 ; SI-NEXT: s_cbranch_scc0 .LBB25_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s38, s5, 24 @@ -10061,37 +10061,37 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v4, 28 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s85, v4, 29 -; SI-NEXT: v_readlane_b32 s84, v4, 28 -; SI-NEXT: v_readlane_b32 s83, v4, 27 -; SI-NEXT: v_readlane_b32 s82, v4, 26 -; SI-NEXT: v_readlane_b32 s81, v4, 25 -; SI-NEXT: v_readlane_b32 s80, v4, 24 -; SI-NEXT: v_readlane_b32 s71, v4, 23 -; SI-NEXT: v_readlane_b32 s70, v4, 22 -; SI-NEXT: v_readlane_b32 s69, v4, 21 -; SI-NEXT: v_readlane_b32 s68, v4, 20 -; SI-NEXT: v_readlane_b32 s67, v4, 19 -; SI-NEXT: v_readlane_b32 s66, v4, 18 -; SI-NEXT: v_readlane_b32 s65, v4, 17 -; SI-NEXT: v_readlane_b32 s64, v4, 16 -; SI-NEXT: v_readlane_b32 s55, v4, 15 -; SI-NEXT: v_readlane_b32 s54, v4, 14 -; SI-NEXT: v_readlane_b32 s53, v4, 13 -; SI-NEXT: v_readlane_b32 s52, v4, 12 -; SI-NEXT: v_readlane_b32 s51, v4, 11 -; SI-NEXT: v_readlane_b32 s50, v4, 10 -; SI-NEXT: v_readlane_b32 s49, v4, 9 -; SI-NEXT: v_readlane_b32 s48, v4, 8 -; SI-NEXT: v_readlane_b32 s39, v4, 7 -; SI-NEXT: v_readlane_b32 s38, v4, 6 -; SI-NEXT: v_readlane_b32 s37, v4, 5 -; SI-NEXT: v_readlane_b32 s36, v4, 4 -; SI-NEXT: v_readlane_b32 s35, v4, 3 -; SI-NEXT: v_readlane_b32 s34, v4, 2 -; SI-NEXT: v_readlane_b32 s31, v4, 1 -; SI-NEXT: v_readlane_b32 s30, v4, 0 +; SI-NEXT: v_readlane_b32 s31, v4, 29 +; SI-NEXT: v_readlane_b32 s85, v4, 27 +; SI-NEXT: v_readlane_b32 s84, v4, 26 +; SI-NEXT: v_readlane_b32 s83, v4, 25 +; SI-NEXT: v_readlane_b32 s82, v4, 24 +; SI-NEXT: v_readlane_b32 s81, v4, 23 +; SI-NEXT: v_readlane_b32 s80, v4, 22 +; SI-NEXT: v_readlane_b32 s71, v4, 21 +; SI-NEXT: v_readlane_b32 s70, v4, 20 +; SI-NEXT: v_readlane_b32 s69, v4, 19 +; SI-NEXT: v_readlane_b32 s68, v4, 18 +; SI-NEXT: v_readlane_b32 s67, v4, 17 +; SI-NEXT: v_readlane_b32 s66, v4, 16 +; SI-NEXT: v_readlane_b32 s65, v4, 15 +; SI-NEXT: v_readlane_b32 s64, v4, 14 +; SI-NEXT: v_readlane_b32 s55, v4, 13 +; SI-NEXT: v_readlane_b32 s54, v4, 12 +; SI-NEXT: v_readlane_b32 s53, v4, 11 +; SI-NEXT: v_readlane_b32 s52, v4, 10 +; SI-NEXT: v_readlane_b32 s51, v4, 9 +; SI-NEXT: v_readlane_b32 s50, v4, 8 +; SI-NEXT: v_readlane_b32 s49, v4, 7 +; SI-NEXT: v_readlane_b32 s48, v4, 6 +; SI-NEXT: v_readlane_b32 s39, v4, 5 +; SI-NEXT: v_readlane_b32 s38, v4, 4 +; SI-NEXT: v_readlane_b32 s37, v4, 3 +; SI-NEXT: v_readlane_b32 s36, v4, 2 +; SI-NEXT: v_readlane_b32 s35, v4, 1 +; SI-NEXT: v_readlane_b32 s34, v4, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -10154,30 +10154,30 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v4, s30, 0 -; VI-NEXT: v_writelane_b32 v4, s31, 1 -; VI-NEXT: v_writelane_b32 v4, s34, 2 -; VI-NEXT: v_writelane_b32 v4, s35, 3 -; VI-NEXT: v_writelane_b32 v4, s36, 4 -; VI-NEXT: v_writelane_b32 v4, s37, 5 -; VI-NEXT: v_writelane_b32 v4, s38, 6 -; VI-NEXT: v_writelane_b32 v4, s39, 7 -; VI-NEXT: v_writelane_b32 v4, s48, 8 -; VI-NEXT: v_writelane_b32 v4, s49, 9 -; VI-NEXT: v_writelane_b32 v4, s50, 10 -; VI-NEXT: v_writelane_b32 v4, s51, 11 -; VI-NEXT: v_writelane_b32 v4, s52, 12 -; VI-NEXT: v_writelane_b32 v4, s53, 13 -; VI-NEXT: v_writelane_b32 v4, s54, 14 -; VI-NEXT: v_writelane_b32 v4, s55, 15 -; VI-NEXT: v_writelane_b32 v4, s64, 16 -; VI-NEXT: v_writelane_b32 v4, s65, 17 +; VI-NEXT: v_writelane_b32 v4, s34, 0 +; VI-NEXT: v_writelane_b32 v4, s35, 1 +; VI-NEXT: v_writelane_b32 v4, s36, 2 +; VI-NEXT: v_writelane_b32 v4, s37, 3 +; VI-NEXT: v_writelane_b32 v4, s38, 4 +; VI-NEXT: v_writelane_b32 v4, s39, 5 +; VI-NEXT: v_writelane_b32 v4, s48, 6 +; VI-NEXT: v_writelane_b32 v4, s49, 7 +; VI-NEXT: v_writelane_b32 v4, s50, 8 +; VI-NEXT: v_writelane_b32 v4, s51, 9 +; VI-NEXT: v_writelane_b32 v4, s52, 10 +; VI-NEXT: v_writelane_b32 v4, s53, 11 +; VI-NEXT: v_writelane_b32 v4, s54, 12 +; VI-NEXT: v_writelane_b32 v4, s55, 13 +; VI-NEXT: v_writelane_b32 v4, s64, 14 +; VI-NEXT: v_writelane_b32 v4, s65, 15 +; VI-NEXT: v_writelane_b32 v4, s66, 16 +; VI-NEXT: v_writelane_b32 v4, s67, 17 +; VI-NEXT: v_writelane_b32 v4, s30, 18 +; VI-NEXT: v_writelane_b32 v4, s31, 19 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v4, s66, 18 ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[6:7], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: v_writelane_b32 v4, s67, 19 ; VI-NEXT: s_cbranch_scc0 .LBB25_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -10485,27 +10485,27 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v4, 18 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s67, v4, 19 -; VI-NEXT: v_readlane_b32 s66, v4, 18 -; VI-NEXT: v_readlane_b32 s65, v4, 17 -; VI-NEXT: v_readlane_b32 s64, v4, 16 -; VI-NEXT: v_readlane_b32 s55, v4, 15 -; VI-NEXT: v_readlane_b32 s54, v4, 14 -; VI-NEXT: v_readlane_b32 s53, v4, 13 -; VI-NEXT: v_readlane_b32 s52, v4, 12 -; VI-NEXT: v_readlane_b32 s51, v4, 11 -; VI-NEXT: v_readlane_b32 s50, v4, 10 -; VI-NEXT: v_readlane_b32 s49, v4, 9 -; VI-NEXT: v_readlane_b32 s48, v4, 8 -; VI-NEXT: v_readlane_b32 s39, v4, 7 -; VI-NEXT: v_readlane_b32 s38, v4, 6 -; VI-NEXT: v_readlane_b32 s37, v4, 5 -; VI-NEXT: v_readlane_b32 s36, v4, 4 -; VI-NEXT: v_readlane_b32 s35, v4, 3 -; VI-NEXT: v_readlane_b32 s34, v4, 2 -; VI-NEXT: v_readlane_b32 s31, v4, 1 -; VI-NEXT: v_readlane_b32 s30, v4, 0 +; VI-NEXT: v_readlane_b32 s31, v4, 19 +; VI-NEXT: v_readlane_b32 s67, v4, 17 +; VI-NEXT: v_readlane_b32 s66, v4, 16 +; VI-NEXT: v_readlane_b32 s65, v4, 15 +; VI-NEXT: v_readlane_b32 s64, v4, 14 +; VI-NEXT: v_readlane_b32 s55, v4, 13 +; VI-NEXT: v_readlane_b32 s54, v4, 12 +; VI-NEXT: v_readlane_b32 s53, v4, 11 +; VI-NEXT: v_readlane_b32 s52, v4, 10 +; VI-NEXT: v_readlane_b32 s51, v4, 9 +; VI-NEXT: v_readlane_b32 s50, v4, 8 +; VI-NEXT: v_readlane_b32 s49, v4, 7 +; VI-NEXT: v_readlane_b32 s48, v4, 6 +; VI-NEXT: v_readlane_b32 s39, v4, 5 +; VI-NEXT: v_readlane_b32 s38, v4, 4 +; VI-NEXT: v_readlane_b32 s37, v4, 3 +; VI-NEXT: v_readlane_b32 s36, v4, 2 +; VI-NEXT: v_readlane_b32 s35, v4, 1 +; VI-NEXT: v_readlane_b32 s34, v4, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -10568,26 +10568,26 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v4, s30, 0 -; GFX9-NEXT: v_writelane_b32 v4, s31, 1 -; GFX9-NEXT: v_writelane_b32 v4, s34, 2 -; GFX9-NEXT: v_writelane_b32 v4, s35, 3 -; GFX9-NEXT: v_writelane_b32 v4, s36, 4 -; GFX9-NEXT: v_writelane_b32 v4, s37, 5 -; GFX9-NEXT: v_writelane_b32 v4, s38, 6 -; GFX9-NEXT: v_writelane_b32 v4, s39, 7 -; GFX9-NEXT: v_writelane_b32 v4, s48, 8 -; GFX9-NEXT: v_writelane_b32 v4, s49, 9 -; GFX9-NEXT: v_writelane_b32 v4, s50, 10 -; GFX9-NEXT: v_writelane_b32 v4, s51, 11 -; GFX9-NEXT: v_writelane_b32 v4, s52, 12 -; GFX9-NEXT: v_writelane_b32 v4, s53, 13 +; GFX9-NEXT: v_writelane_b32 v4, s34, 0 +; GFX9-NEXT: v_writelane_b32 v4, s35, 1 +; GFX9-NEXT: v_writelane_b32 v4, s36, 2 +; GFX9-NEXT: v_writelane_b32 v4, s37, 3 +; GFX9-NEXT: v_writelane_b32 v4, s38, 4 +; GFX9-NEXT: v_writelane_b32 v4, s39, 5 +; GFX9-NEXT: v_writelane_b32 v4, s48, 6 +; GFX9-NEXT: v_writelane_b32 v4, s49, 7 +; GFX9-NEXT: v_writelane_b32 v4, s50, 8 +; GFX9-NEXT: v_writelane_b32 v4, s51, 9 +; GFX9-NEXT: v_writelane_b32 v4, s52, 10 +; GFX9-NEXT: v_writelane_b32 v4, s53, 11 +; GFX9-NEXT: v_writelane_b32 v4, s54, 12 +; GFX9-NEXT: v_writelane_b32 v4, s55, 13 +; GFX9-NEXT: v_writelane_b32 v4, s30, 14 +; GFX9-NEXT: v_writelane_b32 v4, s31, 15 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v4, s54, 14 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: v_writelane_b32 v4, s55, 15 ; GFX9-NEXT: s_cbranch_scc0 .LBB25_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -10880,23 +10880,23 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v4, 14 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60 -; GFX9-NEXT: v_readlane_b32 s55, v4, 15 -; GFX9-NEXT: v_readlane_b32 s54, v4, 14 -; GFX9-NEXT: v_readlane_b32 s53, v4, 13 -; GFX9-NEXT: v_readlane_b32 s52, v4, 12 -; GFX9-NEXT: v_readlane_b32 s51, v4, 11 -; GFX9-NEXT: v_readlane_b32 s50, v4, 10 -; GFX9-NEXT: v_readlane_b32 s49, v4, 9 -; GFX9-NEXT: v_readlane_b32 s48, v4, 8 -; GFX9-NEXT: v_readlane_b32 s39, v4, 7 -; GFX9-NEXT: v_readlane_b32 s38, v4, 6 -; GFX9-NEXT: v_readlane_b32 s37, v4, 5 -; GFX9-NEXT: v_readlane_b32 s36, v4, 4 -; GFX9-NEXT: v_readlane_b32 s35, v4, 3 -; GFX9-NEXT: v_readlane_b32 s34, v4, 2 -; GFX9-NEXT: v_readlane_b32 s31, v4, 1 -; GFX9-NEXT: v_readlane_b32 s30, v4, 0 +; GFX9-NEXT: v_readlane_b32 s31, v4, 15 +; GFX9-NEXT: v_readlane_b32 s55, v4, 13 +; GFX9-NEXT: v_readlane_b32 s54, v4, 12 +; GFX9-NEXT: v_readlane_b32 s53, v4, 11 +; GFX9-NEXT: v_readlane_b32 s52, v4, 10 +; GFX9-NEXT: v_readlane_b32 s51, v4, 9 +; GFX9-NEXT: v_readlane_b32 s50, v4, 8 +; GFX9-NEXT: v_readlane_b32 s49, v4, 7 +; GFX9-NEXT: v_readlane_b32 s48, v4, 6 +; GFX9-NEXT: v_readlane_b32 s39, v4, 5 +; GFX9-NEXT: v_readlane_b32 s38, v4, 4 +; GFX9-NEXT: v_readlane_b32 s37, v4, 3 +; GFX9-NEXT: v_readlane_b32 s36, v4, 2 +; GFX9-NEXT: v_readlane_b32 s35, v4, 1 +; GFX9-NEXT: v_readlane_b32 s34, v4, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -10959,17 +10959,17 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v17, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v17, s30, 0 +; GFX11-NEXT: v_writelane_b32 v17, s34, 0 +; GFX11-NEXT: v_writelane_b32 v17, s35, 1 +; GFX11-NEXT: v_writelane_b32 v17, s36, 2 +; GFX11-NEXT: v_writelane_b32 v17, s37, 3 +; GFX11-NEXT: v_writelane_b32 v17, s38, 4 +; GFX11-NEXT: v_writelane_b32 v17, s39, 5 +; GFX11-NEXT: v_writelane_b32 v17, s48, 6 +; GFX11-NEXT: v_writelane_b32 v17, s30, 7 +; GFX11-NEXT: v_writelane_b32 v17, s31, 8 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 vcc_lo, 0 -; GFX11-NEXT: v_writelane_b32 v17, s31, 1 -; GFX11-NEXT: v_writelane_b32 v17, s34, 2 -; GFX11-NEXT: v_writelane_b32 v17, s35, 3 -; GFX11-NEXT: v_writelane_b32 v17, s36, 4 -; GFX11-NEXT: v_writelane_b32 v17, s37, 5 -; GFX11-NEXT: v_writelane_b32 v17, s38, 6 -; GFX11-NEXT: v_writelane_b32 v17, s39, 7 -; GFX11-NEXT: v_writelane_b32 v17, s48, 8 ; GFX11-NEXT: s_cbranch_scc0 .LBB25_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s27, 24 @@ -11240,20 +11240,20 @@ define inreg <64 x i8> @bitcast_v16i32_to_v64i8_scalar(<16 x i32> inreg %a, i32 ; GFX11-NEXT: s_or_b32 s3, s4, s5 ; GFX11-NEXT: v_dual_mov_b32 v13, s0 :: v_dual_mov_b32 v14, s1 ; GFX11-NEXT: v_dual_mov_b32 v15, s2 :: v_dual_mov_b32 v16, s3 +; GFX11-NEXT: v_readlane_b32 s30, v17, 7 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 -; GFX11-NEXT: v_readlane_b32 s48, v17, 8 -; GFX11-NEXT: v_readlane_b32 s39, v17, 7 -; GFX11-NEXT: v_readlane_b32 s38, v17, 6 -; GFX11-NEXT: v_readlane_b32 s37, v17, 5 -; GFX11-NEXT: v_readlane_b32 s36, v17, 4 -; GFX11-NEXT: v_readlane_b32 s35, v17, 3 -; GFX11-NEXT: v_readlane_b32 s34, v17, 2 -; GFX11-NEXT: v_readlane_b32 s31, v17, 1 -; GFX11-NEXT: v_readlane_b32 s30, v17, 0 +; GFX11-NEXT: v_readlane_b32 s31, v17, 8 +; GFX11-NEXT: v_readlane_b32 s48, v17, 6 +; GFX11-NEXT: v_readlane_b32 s39, v17, 5 +; GFX11-NEXT: v_readlane_b32 s38, v17, 4 +; GFX11-NEXT: v_readlane_b32 s37, v17, 3 +; GFX11-NEXT: v_readlane_b32 s36, v17, 2 +; GFX11-NEXT: v_readlane_b32 s35, v17, 1 +; GFX11-NEXT: v_readlane_b32 s34, v17, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v17, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -21564,8 +21564,8 @@ define inreg <16 x float> @bitcast_v32bf16_to_v16f32_scalar(<32 x bfloat> inreg ; VI-NEXT: buffer_store_dword v19, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v19, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v19, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -21883,8 +21883,8 @@ define inreg <16 x float> @bitcast_v32bf16_to_v16f32_scalar(<32 x bfloat> inreg ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB47_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: v_readlane_b32 s30, v19, 0 +; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -21898,8 +21898,8 @@ define inreg <16 x float> @bitcast_v32bf16_to_v16f32_scalar(<32 x bfloat> inreg ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -22234,8 +22234,8 @@ define inreg <16 x float> @bitcast_v32bf16_to_v16f32_scalar(<32 x bfloat> inreg ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB47_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -22953,8 +22953,6 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v16f32_to_v64i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -22971,6 +22969,8 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: ; implicit-def: $vgpr58 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -23372,10 +23372,6 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v16f32_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: ; kill: killed $vgpr17 -; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -23392,6 +23388,10 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; VI-NEXT: ; implicit-def: $vgpr17 +; VI-NEXT: ; kill: killed $vgpr17 +; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr29 ; VI-NEXT: ; implicit-def: $vgpr27 ; VI-NEXT: ; implicit-def: $vgpr22 @@ -23703,10 +23703,6 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v16f32_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -23723,6 +23719,10 @@ define <64 x i8> @bitcast_v16f32_to_v64i8(<16 x float> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -24582,40 +24582,40 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v40, s30, 0 -; SI-NEXT: v_writelane_b32 v40, s31, 1 -; SI-NEXT: v_writelane_b32 v40, s34, 2 -; SI-NEXT: v_writelane_b32 v40, s35, 3 -; SI-NEXT: v_writelane_b32 v40, s36, 4 -; SI-NEXT: v_writelane_b32 v40, s37, 5 -; SI-NEXT: v_writelane_b32 v40, s38, 6 -; SI-NEXT: v_writelane_b32 v40, s39, 7 -; SI-NEXT: v_writelane_b32 v40, s48, 8 -; SI-NEXT: v_writelane_b32 v40, s49, 9 -; SI-NEXT: v_writelane_b32 v40, s50, 10 -; SI-NEXT: v_writelane_b32 v40, s51, 11 -; SI-NEXT: v_writelane_b32 v40, s52, 12 -; SI-NEXT: v_writelane_b32 v40, s53, 13 -; SI-NEXT: v_writelane_b32 v40, s54, 14 -; SI-NEXT: v_writelane_b32 v40, s55, 15 -; SI-NEXT: v_writelane_b32 v40, s64, 16 -; SI-NEXT: v_writelane_b32 v40, s65, 17 -; SI-NEXT: v_writelane_b32 v40, s66, 18 -; SI-NEXT: v_writelane_b32 v40, s67, 19 -; SI-NEXT: v_writelane_b32 v40, s68, 20 -; SI-NEXT: v_writelane_b32 v40, s69, 21 -; SI-NEXT: v_writelane_b32 v40, s70, 22 -; SI-NEXT: v_writelane_b32 v40, s71, 23 -; SI-NEXT: v_writelane_b32 v40, s80, 24 -; SI-NEXT: v_writelane_b32 v40, s81, 25 -; SI-NEXT: v_writelane_b32 v40, s82, 26 -; SI-NEXT: v_writelane_b32 v40, s83, 27 +; SI-NEXT: v_writelane_b32 v40, s34, 0 +; SI-NEXT: v_writelane_b32 v40, s35, 1 +; SI-NEXT: v_writelane_b32 v40, s36, 2 +; SI-NEXT: v_writelane_b32 v40, s37, 3 +; SI-NEXT: v_writelane_b32 v40, s38, 4 +; SI-NEXT: v_writelane_b32 v40, s39, 5 +; SI-NEXT: v_writelane_b32 v40, s48, 6 +; SI-NEXT: v_writelane_b32 v40, s49, 7 +; SI-NEXT: v_writelane_b32 v40, s50, 8 +; SI-NEXT: v_writelane_b32 v40, s51, 9 +; SI-NEXT: v_writelane_b32 v40, s52, 10 +; SI-NEXT: v_writelane_b32 v40, s53, 11 +; SI-NEXT: v_writelane_b32 v40, s54, 12 +; SI-NEXT: v_writelane_b32 v40, s55, 13 +; SI-NEXT: v_writelane_b32 v40, s64, 14 +; SI-NEXT: v_writelane_b32 v40, s65, 15 +; SI-NEXT: v_writelane_b32 v40, s66, 16 +; SI-NEXT: v_writelane_b32 v40, s67, 17 +; SI-NEXT: v_writelane_b32 v40, s68, 18 +; SI-NEXT: v_writelane_b32 v40, s69, 19 +; SI-NEXT: v_writelane_b32 v40, s70, 20 +; SI-NEXT: v_writelane_b32 v40, s71, 21 +; SI-NEXT: v_writelane_b32 v40, s80, 22 +; SI-NEXT: v_writelane_b32 v40, s81, 23 +; SI-NEXT: v_writelane_b32 v40, s82, 24 +; SI-NEXT: v_writelane_b32 v40, s83, 25 +; SI-NEXT: v_writelane_b32 v40, s84, 26 +; SI-NEXT: v_writelane_b32 v40, s85, 27 +; SI-NEXT: v_writelane_b32 v40, s30, 28 +; SI-NEXT: v_writelane_b32 v40, s31, 29 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; SI-NEXT: v_writelane_b32 v40, s84, 28 ; SI-NEXT: v_readfirstlane_b32 s36, v1 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s37, v2 -; SI-NEXT: v_writelane_b32 v40, s85, 29 ; SI-NEXT: s_cbranch_scc0 .LBB49_3 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s82, s37, 24 @@ -25030,37 +25030,37 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; SI-NEXT: v_or_b32_e32 v2, v3, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 +; SI-NEXT: v_readlane_b32 s30, v40, 28 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s85, v40, 29 -; SI-NEXT: v_readlane_b32 s84, v40, 28 -; SI-NEXT: v_readlane_b32 s83, v40, 27 -; SI-NEXT: v_readlane_b32 s82, v40, 26 -; SI-NEXT: v_readlane_b32 s81, v40, 25 -; SI-NEXT: v_readlane_b32 s80, v40, 24 -; SI-NEXT: v_readlane_b32 s71, v40, 23 -; SI-NEXT: v_readlane_b32 s70, v40, 22 -; SI-NEXT: v_readlane_b32 s69, v40, 21 -; SI-NEXT: v_readlane_b32 s68, v40, 20 -; SI-NEXT: v_readlane_b32 s67, v40, 19 -; SI-NEXT: v_readlane_b32 s66, v40, 18 -; SI-NEXT: v_readlane_b32 s65, v40, 17 -; SI-NEXT: v_readlane_b32 s64, v40, 16 -; SI-NEXT: v_readlane_b32 s55, v40, 15 -; SI-NEXT: v_readlane_b32 s54, v40, 14 -; SI-NEXT: v_readlane_b32 s53, v40, 13 -; SI-NEXT: v_readlane_b32 s52, v40, 12 -; SI-NEXT: v_readlane_b32 s51, v40, 11 -; SI-NEXT: v_readlane_b32 s50, v40, 10 -; SI-NEXT: v_readlane_b32 s49, v40, 9 -; SI-NEXT: v_readlane_b32 s48, v40, 8 -; SI-NEXT: v_readlane_b32 s39, v40, 7 -; SI-NEXT: v_readlane_b32 s38, v40, 6 -; SI-NEXT: v_readlane_b32 s37, v40, 5 -; SI-NEXT: v_readlane_b32 s36, v40, 4 -; SI-NEXT: v_readlane_b32 s35, v40, 3 -; SI-NEXT: v_readlane_b32 s34, v40, 2 -; SI-NEXT: v_readlane_b32 s31, v40, 1 -; SI-NEXT: v_readlane_b32 s30, v40, 0 +; SI-NEXT: v_readlane_b32 s31, v40, 29 +; SI-NEXT: v_readlane_b32 s85, v40, 27 +; SI-NEXT: v_readlane_b32 s84, v40, 26 +; SI-NEXT: v_readlane_b32 s83, v40, 25 +; SI-NEXT: v_readlane_b32 s82, v40, 24 +; SI-NEXT: v_readlane_b32 s81, v40, 23 +; SI-NEXT: v_readlane_b32 s80, v40, 22 +; SI-NEXT: v_readlane_b32 s71, v40, 21 +; SI-NEXT: v_readlane_b32 s70, v40, 20 +; SI-NEXT: v_readlane_b32 s69, v40, 19 +; SI-NEXT: v_readlane_b32 s68, v40, 18 +; SI-NEXT: v_readlane_b32 s67, v40, 17 +; SI-NEXT: v_readlane_b32 s66, v40, 16 +; SI-NEXT: v_readlane_b32 s65, v40, 15 +; SI-NEXT: v_readlane_b32 s64, v40, 14 +; SI-NEXT: v_readlane_b32 s55, v40, 13 +; SI-NEXT: v_readlane_b32 s54, v40, 12 +; SI-NEXT: v_readlane_b32 s53, v40, 11 +; SI-NEXT: v_readlane_b32 s52, v40, 10 +; SI-NEXT: v_readlane_b32 s51, v40, 9 +; SI-NEXT: v_readlane_b32 s50, v40, 8 +; SI-NEXT: v_readlane_b32 s49, v40, 7 +; SI-NEXT: v_readlane_b32 s48, v40, 6 +; SI-NEXT: v_readlane_b32 s39, v40, 5 +; SI-NEXT: v_readlane_b32 s38, v40, 4 +; SI-NEXT: v_readlane_b32 s37, v40, 3 +; SI-NEXT: v_readlane_b32 s36, v40, 2 +; SI-NEXT: v_readlane_b32 s35, v40, 1 +; SI-NEXT: v_readlane_b32 s34, v40, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -25073,30 +25073,6 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_readfirstlane_b32 s4, v1 -; VI-NEXT: s_and_b64 s[6:7], vcc, exec -; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -25112,6 +25088,30 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s30, 18 +; VI-NEXT: v_writelane_b32 v63, s31, 19 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; VI-NEXT: v_readfirstlane_b32 s4, v1 +; VI-NEXT: s_and_b64 s[6:7], vcc, exec +; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: s_cbranch_scc0 .LBB49_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -25440,26 +25440,26 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; VI-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 18 +; VI-NEXT: v_readlane_b32 s31, v63, 19 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; VI-NEXT: v_or_b32_sdwa v5, v33, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -25517,26 +25517,6 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_readfirstlane_b32 s4, v1 -; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec -; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -25552,6 +25532,26 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s30, 14 +; GFX9-NEXT: v_writelane_b32 v63, s31, 15 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; GFX9-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec +; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: s_cbranch_scc0 .LBB49_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -25873,22 +25873,22 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; GFX9-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 14 +; GFX9-NEXT: v_readlane_b32 s31, v63, 15 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; GFX9-NEXT: v_or_b32_sdwa v5, v33, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -25942,18 +25942,18 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-NEXT: v_writelane_b32 v40, s30, 8 +; GFX11-NEXT: v_writelane_b32 v40, s31, 9 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 s42, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-NEXT: v_writelane_b32 v40, s49, 9 ; GFX11-NEXT: s_cbranch_scc0 .LBB49_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s43, s27, 24 @@ -26301,21 +26301,21 @@ define inreg <64 x i8> @bitcast_v16f32_to_v64i8_scalar(<16 x float> inreg %a, i3 ; GFX11-NEXT: v_or_b32_e32 v2, v4, v10 ; GFX11-NEXT: v_or_b32_e32 v3, v11, v7 ; GFX11-NEXT: v_or_b32_e32 v4, v12, v8 +; GFX11-NEXT: v_readlane_b32 s30, v40, 8 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[82:85], off ; GFX11-NEXT: scratch_store_b128 v0, v[23:26], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 -; GFX11-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 +; GFX11-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -35963,8 +35963,8 @@ define inreg <8 x i64> @bitcast_v32bf16_to_v8i64_scalar(<32 x bfloat> inreg %a, ; VI-NEXT: buffer_store_dword v19, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v19, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v19, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -36282,8 +36282,8 @@ define inreg <8 x i64> @bitcast_v32bf16_to_v8i64_scalar(<32 x bfloat> inreg %a, ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB67_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: v_readlane_b32 s30, v19, 0 +; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -36297,8 +36297,8 @@ define inreg <8 x i64> @bitcast_v32bf16_to_v8i64_scalar(<32 x bfloat> inreg %a, ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -36633,8 +36633,8 @@ define inreg <8 x i64> @bitcast_v32bf16_to_v8i64_scalar(<32 x bfloat> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB67_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -37352,8 +37352,6 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v8i64_to_v64i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -37370,6 +37368,8 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: ; implicit-def: $vgpr58 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -37771,10 +37771,6 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v8i64_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: ; kill: killed $vgpr17 -; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -37791,6 +37787,10 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; VI-NEXT: ; implicit-def: $vgpr17 +; VI-NEXT: ; kill: killed $vgpr17 +; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr29 ; VI-NEXT: ; implicit-def: $vgpr27 ; VI-NEXT: ; implicit-def: $vgpr22 @@ -38102,10 +38102,6 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v8i64_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -38122,6 +38118,10 @@ define <64 x i8> @bitcast_v8i64_to_v64i8(<8 x i64> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -39007,40 +39007,40 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; SI-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v4, s30, 0 -; SI-NEXT: v_writelane_b32 v4, s31, 1 -; SI-NEXT: v_writelane_b32 v4, s34, 2 -; SI-NEXT: v_writelane_b32 v4, s35, 3 -; SI-NEXT: v_writelane_b32 v4, s36, 4 -; SI-NEXT: v_writelane_b32 v4, s37, 5 -; SI-NEXT: v_writelane_b32 v4, s38, 6 -; SI-NEXT: v_writelane_b32 v4, s39, 7 -; SI-NEXT: v_writelane_b32 v4, s48, 8 -; SI-NEXT: v_writelane_b32 v4, s49, 9 -; SI-NEXT: v_writelane_b32 v4, s50, 10 -; SI-NEXT: v_writelane_b32 v4, s51, 11 -; SI-NEXT: v_writelane_b32 v4, s52, 12 -; SI-NEXT: v_writelane_b32 v4, s53, 13 -; SI-NEXT: v_writelane_b32 v4, s54, 14 -; SI-NEXT: v_writelane_b32 v4, s55, 15 -; SI-NEXT: v_writelane_b32 v4, s64, 16 -; SI-NEXT: v_writelane_b32 v4, s65, 17 -; SI-NEXT: v_writelane_b32 v4, s66, 18 -; SI-NEXT: v_writelane_b32 v4, s67, 19 -; SI-NEXT: v_writelane_b32 v4, s68, 20 -; SI-NEXT: v_writelane_b32 v4, s69, 21 -; SI-NEXT: v_writelane_b32 v4, s70, 22 -; SI-NEXT: v_writelane_b32 v4, s71, 23 -; SI-NEXT: v_writelane_b32 v4, s80, 24 -; SI-NEXT: v_writelane_b32 v4, s81, 25 -; SI-NEXT: v_writelane_b32 v4, s82, 26 -; SI-NEXT: v_writelane_b32 v4, s83, 27 +; SI-NEXT: v_writelane_b32 v4, s34, 0 +; SI-NEXT: v_writelane_b32 v4, s35, 1 +; SI-NEXT: v_writelane_b32 v4, s36, 2 +; SI-NEXT: v_writelane_b32 v4, s37, 3 +; SI-NEXT: v_writelane_b32 v4, s38, 4 +; SI-NEXT: v_writelane_b32 v4, s39, 5 +; SI-NEXT: v_writelane_b32 v4, s48, 6 +; SI-NEXT: v_writelane_b32 v4, s49, 7 +; SI-NEXT: v_writelane_b32 v4, s50, 8 +; SI-NEXT: v_writelane_b32 v4, s51, 9 +; SI-NEXT: v_writelane_b32 v4, s52, 10 +; SI-NEXT: v_writelane_b32 v4, s53, 11 +; SI-NEXT: v_writelane_b32 v4, s54, 12 +; SI-NEXT: v_writelane_b32 v4, s55, 13 +; SI-NEXT: v_writelane_b32 v4, s64, 14 +; SI-NEXT: v_writelane_b32 v4, s65, 15 +; SI-NEXT: v_writelane_b32 v4, s66, 16 +; SI-NEXT: v_writelane_b32 v4, s67, 17 +; SI-NEXT: v_writelane_b32 v4, s68, 18 +; SI-NEXT: v_writelane_b32 v4, s69, 19 +; SI-NEXT: v_writelane_b32 v4, s70, 20 +; SI-NEXT: v_writelane_b32 v4, s71, 21 +; SI-NEXT: v_writelane_b32 v4, s80, 22 +; SI-NEXT: v_writelane_b32 v4, s81, 23 +; SI-NEXT: v_writelane_b32 v4, s82, 24 +; SI-NEXT: v_writelane_b32 v4, s83, 25 +; SI-NEXT: v_writelane_b32 v4, s84, 26 +; SI-NEXT: v_writelane_b32 v4, s85, 27 +; SI-NEXT: v_writelane_b32 v4, s30, 28 +; SI-NEXT: v_writelane_b32 v4, s31, 29 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; SI-NEXT: v_writelane_b32 v4, s84, 28 ; SI-NEXT: v_readfirstlane_b32 s4, v1 ; SI-NEXT: s_and_b64 s[6:7], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v2 -; SI-NEXT: v_writelane_b32 v4, s85, 29 ; SI-NEXT: s_cbranch_scc0 .LBB69_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s38, s5, 24 @@ -39361,37 +39361,37 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v4, 28 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s85, v4, 29 -; SI-NEXT: v_readlane_b32 s84, v4, 28 -; SI-NEXT: v_readlane_b32 s83, v4, 27 -; SI-NEXT: v_readlane_b32 s82, v4, 26 -; SI-NEXT: v_readlane_b32 s81, v4, 25 -; SI-NEXT: v_readlane_b32 s80, v4, 24 -; SI-NEXT: v_readlane_b32 s71, v4, 23 -; SI-NEXT: v_readlane_b32 s70, v4, 22 -; SI-NEXT: v_readlane_b32 s69, v4, 21 -; SI-NEXT: v_readlane_b32 s68, v4, 20 -; SI-NEXT: v_readlane_b32 s67, v4, 19 -; SI-NEXT: v_readlane_b32 s66, v4, 18 -; SI-NEXT: v_readlane_b32 s65, v4, 17 -; SI-NEXT: v_readlane_b32 s64, v4, 16 -; SI-NEXT: v_readlane_b32 s55, v4, 15 -; SI-NEXT: v_readlane_b32 s54, v4, 14 -; SI-NEXT: v_readlane_b32 s53, v4, 13 -; SI-NEXT: v_readlane_b32 s52, v4, 12 -; SI-NEXT: v_readlane_b32 s51, v4, 11 -; SI-NEXT: v_readlane_b32 s50, v4, 10 -; SI-NEXT: v_readlane_b32 s49, v4, 9 -; SI-NEXT: v_readlane_b32 s48, v4, 8 -; SI-NEXT: v_readlane_b32 s39, v4, 7 -; SI-NEXT: v_readlane_b32 s38, v4, 6 -; SI-NEXT: v_readlane_b32 s37, v4, 5 -; SI-NEXT: v_readlane_b32 s36, v4, 4 -; SI-NEXT: v_readlane_b32 s35, v4, 3 -; SI-NEXT: v_readlane_b32 s34, v4, 2 -; SI-NEXT: v_readlane_b32 s31, v4, 1 -; SI-NEXT: v_readlane_b32 s30, v4, 0 +; SI-NEXT: v_readlane_b32 s31, v4, 29 +; SI-NEXT: v_readlane_b32 s85, v4, 27 +; SI-NEXT: v_readlane_b32 s84, v4, 26 +; SI-NEXT: v_readlane_b32 s83, v4, 25 +; SI-NEXT: v_readlane_b32 s82, v4, 24 +; SI-NEXT: v_readlane_b32 s81, v4, 23 +; SI-NEXT: v_readlane_b32 s80, v4, 22 +; SI-NEXT: v_readlane_b32 s71, v4, 21 +; SI-NEXT: v_readlane_b32 s70, v4, 20 +; SI-NEXT: v_readlane_b32 s69, v4, 19 +; SI-NEXT: v_readlane_b32 s68, v4, 18 +; SI-NEXT: v_readlane_b32 s67, v4, 17 +; SI-NEXT: v_readlane_b32 s66, v4, 16 +; SI-NEXT: v_readlane_b32 s65, v4, 15 +; SI-NEXT: v_readlane_b32 s64, v4, 14 +; SI-NEXT: v_readlane_b32 s55, v4, 13 +; SI-NEXT: v_readlane_b32 s54, v4, 12 +; SI-NEXT: v_readlane_b32 s53, v4, 11 +; SI-NEXT: v_readlane_b32 s52, v4, 10 +; SI-NEXT: v_readlane_b32 s51, v4, 9 +; SI-NEXT: v_readlane_b32 s50, v4, 8 +; SI-NEXT: v_readlane_b32 s49, v4, 7 +; SI-NEXT: v_readlane_b32 s48, v4, 6 +; SI-NEXT: v_readlane_b32 s39, v4, 5 +; SI-NEXT: v_readlane_b32 s38, v4, 4 +; SI-NEXT: v_readlane_b32 s37, v4, 3 +; SI-NEXT: v_readlane_b32 s36, v4, 2 +; SI-NEXT: v_readlane_b32 s35, v4, 1 +; SI-NEXT: v_readlane_b32 s34, v4, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -39454,30 +39454,30 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v4, s30, 0 -; VI-NEXT: v_writelane_b32 v4, s31, 1 -; VI-NEXT: v_writelane_b32 v4, s34, 2 -; VI-NEXT: v_writelane_b32 v4, s35, 3 -; VI-NEXT: v_writelane_b32 v4, s36, 4 -; VI-NEXT: v_writelane_b32 v4, s37, 5 -; VI-NEXT: v_writelane_b32 v4, s38, 6 -; VI-NEXT: v_writelane_b32 v4, s39, 7 -; VI-NEXT: v_writelane_b32 v4, s48, 8 -; VI-NEXT: v_writelane_b32 v4, s49, 9 -; VI-NEXT: v_writelane_b32 v4, s50, 10 -; VI-NEXT: v_writelane_b32 v4, s51, 11 -; VI-NEXT: v_writelane_b32 v4, s52, 12 -; VI-NEXT: v_writelane_b32 v4, s53, 13 -; VI-NEXT: v_writelane_b32 v4, s54, 14 -; VI-NEXT: v_writelane_b32 v4, s55, 15 -; VI-NEXT: v_writelane_b32 v4, s64, 16 -; VI-NEXT: v_writelane_b32 v4, s65, 17 +; VI-NEXT: v_writelane_b32 v4, s34, 0 +; VI-NEXT: v_writelane_b32 v4, s35, 1 +; VI-NEXT: v_writelane_b32 v4, s36, 2 +; VI-NEXT: v_writelane_b32 v4, s37, 3 +; VI-NEXT: v_writelane_b32 v4, s38, 4 +; VI-NEXT: v_writelane_b32 v4, s39, 5 +; VI-NEXT: v_writelane_b32 v4, s48, 6 +; VI-NEXT: v_writelane_b32 v4, s49, 7 +; VI-NEXT: v_writelane_b32 v4, s50, 8 +; VI-NEXT: v_writelane_b32 v4, s51, 9 +; VI-NEXT: v_writelane_b32 v4, s52, 10 +; VI-NEXT: v_writelane_b32 v4, s53, 11 +; VI-NEXT: v_writelane_b32 v4, s54, 12 +; VI-NEXT: v_writelane_b32 v4, s55, 13 +; VI-NEXT: v_writelane_b32 v4, s64, 14 +; VI-NEXT: v_writelane_b32 v4, s65, 15 +; VI-NEXT: v_writelane_b32 v4, s66, 16 +; VI-NEXT: v_writelane_b32 v4, s67, 17 +; VI-NEXT: v_writelane_b32 v4, s30, 18 +; VI-NEXT: v_writelane_b32 v4, s31, 19 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v4, s66, 18 ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[6:7], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: v_writelane_b32 v4, s67, 19 ; VI-NEXT: s_cbranch_scc0 .LBB69_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -39785,27 +39785,27 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v4, 18 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s67, v4, 19 -; VI-NEXT: v_readlane_b32 s66, v4, 18 -; VI-NEXT: v_readlane_b32 s65, v4, 17 -; VI-NEXT: v_readlane_b32 s64, v4, 16 -; VI-NEXT: v_readlane_b32 s55, v4, 15 -; VI-NEXT: v_readlane_b32 s54, v4, 14 -; VI-NEXT: v_readlane_b32 s53, v4, 13 -; VI-NEXT: v_readlane_b32 s52, v4, 12 -; VI-NEXT: v_readlane_b32 s51, v4, 11 -; VI-NEXT: v_readlane_b32 s50, v4, 10 -; VI-NEXT: v_readlane_b32 s49, v4, 9 -; VI-NEXT: v_readlane_b32 s48, v4, 8 -; VI-NEXT: v_readlane_b32 s39, v4, 7 -; VI-NEXT: v_readlane_b32 s38, v4, 6 -; VI-NEXT: v_readlane_b32 s37, v4, 5 -; VI-NEXT: v_readlane_b32 s36, v4, 4 -; VI-NEXT: v_readlane_b32 s35, v4, 3 -; VI-NEXT: v_readlane_b32 s34, v4, 2 -; VI-NEXT: v_readlane_b32 s31, v4, 1 -; VI-NEXT: v_readlane_b32 s30, v4, 0 +; VI-NEXT: v_readlane_b32 s31, v4, 19 +; VI-NEXT: v_readlane_b32 s67, v4, 17 +; VI-NEXT: v_readlane_b32 s66, v4, 16 +; VI-NEXT: v_readlane_b32 s65, v4, 15 +; VI-NEXT: v_readlane_b32 s64, v4, 14 +; VI-NEXT: v_readlane_b32 s55, v4, 13 +; VI-NEXT: v_readlane_b32 s54, v4, 12 +; VI-NEXT: v_readlane_b32 s53, v4, 11 +; VI-NEXT: v_readlane_b32 s52, v4, 10 +; VI-NEXT: v_readlane_b32 s51, v4, 9 +; VI-NEXT: v_readlane_b32 s50, v4, 8 +; VI-NEXT: v_readlane_b32 s49, v4, 7 +; VI-NEXT: v_readlane_b32 s48, v4, 6 +; VI-NEXT: v_readlane_b32 s39, v4, 5 +; VI-NEXT: v_readlane_b32 s38, v4, 4 +; VI-NEXT: v_readlane_b32 s37, v4, 3 +; VI-NEXT: v_readlane_b32 s36, v4, 2 +; VI-NEXT: v_readlane_b32 s35, v4, 1 +; VI-NEXT: v_readlane_b32 s34, v4, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -39868,26 +39868,26 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v4, s30, 0 -; GFX9-NEXT: v_writelane_b32 v4, s31, 1 -; GFX9-NEXT: v_writelane_b32 v4, s34, 2 -; GFX9-NEXT: v_writelane_b32 v4, s35, 3 -; GFX9-NEXT: v_writelane_b32 v4, s36, 4 -; GFX9-NEXT: v_writelane_b32 v4, s37, 5 -; GFX9-NEXT: v_writelane_b32 v4, s38, 6 -; GFX9-NEXT: v_writelane_b32 v4, s39, 7 -; GFX9-NEXT: v_writelane_b32 v4, s48, 8 -; GFX9-NEXT: v_writelane_b32 v4, s49, 9 -; GFX9-NEXT: v_writelane_b32 v4, s50, 10 -; GFX9-NEXT: v_writelane_b32 v4, s51, 11 -; GFX9-NEXT: v_writelane_b32 v4, s52, 12 -; GFX9-NEXT: v_writelane_b32 v4, s53, 13 +; GFX9-NEXT: v_writelane_b32 v4, s34, 0 +; GFX9-NEXT: v_writelane_b32 v4, s35, 1 +; GFX9-NEXT: v_writelane_b32 v4, s36, 2 +; GFX9-NEXT: v_writelane_b32 v4, s37, 3 +; GFX9-NEXT: v_writelane_b32 v4, s38, 4 +; GFX9-NEXT: v_writelane_b32 v4, s39, 5 +; GFX9-NEXT: v_writelane_b32 v4, s48, 6 +; GFX9-NEXT: v_writelane_b32 v4, s49, 7 +; GFX9-NEXT: v_writelane_b32 v4, s50, 8 +; GFX9-NEXT: v_writelane_b32 v4, s51, 9 +; GFX9-NEXT: v_writelane_b32 v4, s52, 10 +; GFX9-NEXT: v_writelane_b32 v4, s53, 11 +; GFX9-NEXT: v_writelane_b32 v4, s54, 12 +; GFX9-NEXT: v_writelane_b32 v4, s55, 13 +; GFX9-NEXT: v_writelane_b32 v4, s30, 14 +; GFX9-NEXT: v_writelane_b32 v4, s31, 15 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v4, s54, 14 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: v_writelane_b32 v4, s55, 15 ; GFX9-NEXT: s_cbranch_scc0 .LBB69_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -40180,23 +40180,23 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v4, 14 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60 -; GFX9-NEXT: v_readlane_b32 s55, v4, 15 -; GFX9-NEXT: v_readlane_b32 s54, v4, 14 -; GFX9-NEXT: v_readlane_b32 s53, v4, 13 -; GFX9-NEXT: v_readlane_b32 s52, v4, 12 -; GFX9-NEXT: v_readlane_b32 s51, v4, 11 -; GFX9-NEXT: v_readlane_b32 s50, v4, 10 -; GFX9-NEXT: v_readlane_b32 s49, v4, 9 -; GFX9-NEXT: v_readlane_b32 s48, v4, 8 -; GFX9-NEXT: v_readlane_b32 s39, v4, 7 -; GFX9-NEXT: v_readlane_b32 s38, v4, 6 -; GFX9-NEXT: v_readlane_b32 s37, v4, 5 -; GFX9-NEXT: v_readlane_b32 s36, v4, 4 -; GFX9-NEXT: v_readlane_b32 s35, v4, 3 -; GFX9-NEXT: v_readlane_b32 s34, v4, 2 -; GFX9-NEXT: v_readlane_b32 s31, v4, 1 -; GFX9-NEXT: v_readlane_b32 s30, v4, 0 +; GFX9-NEXT: v_readlane_b32 s31, v4, 15 +; GFX9-NEXT: v_readlane_b32 s55, v4, 13 +; GFX9-NEXT: v_readlane_b32 s54, v4, 12 +; GFX9-NEXT: v_readlane_b32 s53, v4, 11 +; GFX9-NEXT: v_readlane_b32 s52, v4, 10 +; GFX9-NEXT: v_readlane_b32 s51, v4, 9 +; GFX9-NEXT: v_readlane_b32 s50, v4, 8 +; GFX9-NEXT: v_readlane_b32 s49, v4, 7 +; GFX9-NEXT: v_readlane_b32 s48, v4, 6 +; GFX9-NEXT: v_readlane_b32 s39, v4, 5 +; GFX9-NEXT: v_readlane_b32 s38, v4, 4 +; GFX9-NEXT: v_readlane_b32 s37, v4, 3 +; GFX9-NEXT: v_readlane_b32 s36, v4, 2 +; GFX9-NEXT: v_readlane_b32 s35, v4, 1 +; GFX9-NEXT: v_readlane_b32 s34, v4, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -40259,17 +40259,17 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v17, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v17, s30, 0 +; GFX11-NEXT: v_writelane_b32 v17, s34, 0 +; GFX11-NEXT: v_writelane_b32 v17, s35, 1 +; GFX11-NEXT: v_writelane_b32 v17, s36, 2 +; GFX11-NEXT: v_writelane_b32 v17, s37, 3 +; GFX11-NEXT: v_writelane_b32 v17, s38, 4 +; GFX11-NEXT: v_writelane_b32 v17, s39, 5 +; GFX11-NEXT: v_writelane_b32 v17, s48, 6 +; GFX11-NEXT: v_writelane_b32 v17, s30, 7 +; GFX11-NEXT: v_writelane_b32 v17, s31, 8 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 vcc_lo, 0 -; GFX11-NEXT: v_writelane_b32 v17, s31, 1 -; GFX11-NEXT: v_writelane_b32 v17, s34, 2 -; GFX11-NEXT: v_writelane_b32 v17, s35, 3 -; GFX11-NEXT: v_writelane_b32 v17, s36, 4 -; GFX11-NEXT: v_writelane_b32 v17, s37, 5 -; GFX11-NEXT: v_writelane_b32 v17, s38, 6 -; GFX11-NEXT: v_writelane_b32 v17, s39, 7 -; GFX11-NEXT: v_writelane_b32 v17, s48, 8 ; GFX11-NEXT: s_cbranch_scc0 .LBB69_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s27, 24 @@ -40540,20 +40540,20 @@ define inreg <64 x i8> @bitcast_v8i64_to_v64i8_scalar(<8 x i64> inreg %a, i32 in ; GFX11-NEXT: s_or_b32 s3, s4, s5 ; GFX11-NEXT: v_dual_mov_b32 v13, s0 :: v_dual_mov_b32 v14, s1 ; GFX11-NEXT: v_dual_mov_b32 v15, s2 :: v_dual_mov_b32 v16, s3 +; GFX11-NEXT: v_readlane_b32 s30, v17, 7 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 -; GFX11-NEXT: v_readlane_b32 s48, v17, 8 -; GFX11-NEXT: v_readlane_b32 s39, v17, 7 -; GFX11-NEXT: v_readlane_b32 s38, v17, 6 -; GFX11-NEXT: v_readlane_b32 s37, v17, 5 -; GFX11-NEXT: v_readlane_b32 s36, v17, 4 -; GFX11-NEXT: v_readlane_b32 s35, v17, 3 -; GFX11-NEXT: v_readlane_b32 s34, v17, 2 -; GFX11-NEXT: v_readlane_b32 s31, v17, 1 -; GFX11-NEXT: v_readlane_b32 s30, v17, 0 +; GFX11-NEXT: v_readlane_b32 s31, v17, 8 +; GFX11-NEXT: v_readlane_b32 s48, v17, 6 +; GFX11-NEXT: v_readlane_b32 s39, v17, 5 +; GFX11-NEXT: v_readlane_b32 s38, v17, 4 +; GFX11-NEXT: v_readlane_b32 s37, v17, 3 +; GFX11-NEXT: v_readlane_b32 s36, v17, 2 +; GFX11-NEXT: v_readlane_b32 s35, v17, 1 +; GFX11-NEXT: v_readlane_b32 s34, v17, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v17, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -49422,8 +49422,8 @@ define inreg <8 x double> @bitcast_v32bf16_to_v8f64_scalar(<32 x bfloat> inreg % ; VI-NEXT: buffer_store_dword v19, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v19, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v19, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -49741,8 +49741,8 @@ define inreg <8 x double> @bitcast_v32bf16_to_v8f64_scalar(<32 x bfloat> inreg % ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB83_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: v_readlane_b32 s30, v19, 0 +; VI-NEXT: v_readlane_b32 s31, v19, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -49756,8 +49756,8 @@ define inreg <8 x double> @bitcast_v32bf16_to_v8f64_scalar(<32 x bfloat> inreg % ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -50092,8 +50092,8 @@ define inreg <8 x double> @bitcast_v32bf16_to_v8f64_scalar(<32 x bfloat> inreg % ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB83_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -50811,8 +50811,6 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v8f64_to_v64i8: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -50829,6 +50827,8 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; SI-NEXT: ; implicit-def: $vgpr26 ; SI-NEXT: ; implicit-def: $vgpr58 ; SI-NEXT: ; implicit-def: $vgpr57 ; SI-NEXT: ; implicit-def: $vgpr47 @@ -51222,10 +51222,6 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v8f64_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: ; kill: killed $vgpr17 -; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -51242,6 +51238,10 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; VI-NEXT: ; implicit-def: $vgpr17 +; VI-NEXT: ; kill: killed $vgpr17 +; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr29 ; VI-NEXT: ; implicit-def: $vgpr27 ; VI-NEXT: ; implicit-def: $vgpr22 @@ -51545,10 +51545,6 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v8f64_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -51565,6 +51561,10 @@ define <64 x i8> @bitcast_v8f64_to_v64i8(<8 x double> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -52416,42 +52416,42 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v40, s30, 0 -; SI-NEXT: v_writelane_b32 v40, s31, 1 -; SI-NEXT: v_writelane_b32 v40, s34, 2 -; SI-NEXT: v_writelane_b32 v40, s35, 3 -; SI-NEXT: v_writelane_b32 v40, s36, 4 -; SI-NEXT: v_writelane_b32 v40, s37, 5 -; SI-NEXT: v_writelane_b32 v40, s38, 6 -; SI-NEXT: v_writelane_b32 v40, s39, 7 -; SI-NEXT: v_writelane_b32 v40, s48, 8 -; SI-NEXT: v_writelane_b32 v40, s49, 9 -; SI-NEXT: v_writelane_b32 v40, s50, 10 -; SI-NEXT: v_writelane_b32 v40, s51, 11 -; SI-NEXT: v_writelane_b32 v40, s52, 12 -; SI-NEXT: v_writelane_b32 v40, s53, 13 -; SI-NEXT: v_writelane_b32 v40, s54, 14 -; SI-NEXT: v_writelane_b32 v40, s55, 15 -; SI-NEXT: v_writelane_b32 v40, s64, 16 -; SI-NEXT: v_writelane_b32 v40, s65, 17 -; SI-NEXT: v_writelane_b32 v40, s66, 18 -; SI-NEXT: v_writelane_b32 v40, s67, 19 -; SI-NEXT: v_writelane_b32 v40, s68, 20 -; SI-NEXT: v_writelane_b32 v40, s69, 21 -; SI-NEXT: v_writelane_b32 v40, s70, 22 -; SI-NEXT: v_writelane_b32 v40, s71, 23 -; SI-NEXT: v_writelane_b32 v40, s80, 24 -; SI-NEXT: v_writelane_b32 v40, s81, 25 -; SI-NEXT: v_writelane_b32 v40, s82, 26 -; SI-NEXT: v_writelane_b32 v40, s83, 27 -; SI-NEXT: v_writelane_b32 v40, s84, 28 -; SI-NEXT: v_writelane_b32 v40, s85, 29 +; SI-NEXT: v_writelane_b32 v40, s34, 0 +; SI-NEXT: v_writelane_b32 v40, s35, 1 +; SI-NEXT: v_writelane_b32 v40, s36, 2 +; SI-NEXT: v_writelane_b32 v40, s37, 3 +; SI-NEXT: v_writelane_b32 v40, s38, 4 +; SI-NEXT: v_writelane_b32 v40, s39, 5 +; SI-NEXT: v_writelane_b32 v40, s48, 6 +; SI-NEXT: v_writelane_b32 v40, s49, 7 +; SI-NEXT: v_writelane_b32 v40, s50, 8 +; SI-NEXT: v_writelane_b32 v40, s51, 9 +; SI-NEXT: v_writelane_b32 v40, s52, 10 +; SI-NEXT: v_writelane_b32 v40, s53, 11 +; SI-NEXT: v_writelane_b32 v40, s54, 12 +; SI-NEXT: v_writelane_b32 v40, s55, 13 +; SI-NEXT: v_writelane_b32 v40, s64, 14 +; SI-NEXT: v_writelane_b32 v40, s65, 15 +; SI-NEXT: v_writelane_b32 v40, s66, 16 +; SI-NEXT: v_writelane_b32 v40, s67, 17 +; SI-NEXT: v_writelane_b32 v40, s68, 18 +; SI-NEXT: v_writelane_b32 v40, s69, 19 +; SI-NEXT: v_writelane_b32 v40, s70, 20 +; SI-NEXT: v_writelane_b32 v40, s71, 21 +; SI-NEXT: v_writelane_b32 v40, s80, 22 +; SI-NEXT: v_writelane_b32 v40, s81, 23 +; SI-NEXT: v_writelane_b32 v40, s82, 24 +; SI-NEXT: v_writelane_b32 v40, s83, 25 +; SI-NEXT: v_writelane_b32 v40, s84, 26 +; SI-NEXT: v_writelane_b32 v40, s85, 27 +; SI-NEXT: v_writelane_b32 v40, s86, 28 +; SI-NEXT: v_writelane_b32 v40, s87, 29 +; SI-NEXT: v_writelane_b32 v40, s30, 30 +; SI-NEXT: v_writelane_b32 v40, s31, 31 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; SI-NEXT: v_writelane_b32 v40, s86, 30 ; SI-NEXT: v_readfirstlane_b32 s4, v1 ; SI-NEXT: s_and_b64 s[6:7], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v2 -; SI-NEXT: v_writelane_b32 v40, s87, 31 ; SI-NEXT: s_cbranch_scc0 .LBB85_3 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s48, s5, 24 @@ -52850,39 +52850,39 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v40, 30 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s87, v40, 31 -; SI-NEXT: v_readlane_b32 s86, v40, 30 -; SI-NEXT: v_readlane_b32 s85, v40, 29 -; SI-NEXT: v_readlane_b32 s84, v40, 28 -; SI-NEXT: v_readlane_b32 s83, v40, 27 -; SI-NEXT: v_readlane_b32 s82, v40, 26 -; SI-NEXT: v_readlane_b32 s81, v40, 25 -; SI-NEXT: v_readlane_b32 s80, v40, 24 -; SI-NEXT: v_readlane_b32 s71, v40, 23 -; SI-NEXT: v_readlane_b32 s70, v40, 22 -; SI-NEXT: v_readlane_b32 s69, v40, 21 -; SI-NEXT: v_readlane_b32 s68, v40, 20 -; SI-NEXT: v_readlane_b32 s67, v40, 19 -; SI-NEXT: v_readlane_b32 s66, v40, 18 -; SI-NEXT: v_readlane_b32 s65, v40, 17 -; SI-NEXT: v_readlane_b32 s64, v40, 16 -; SI-NEXT: v_readlane_b32 s55, v40, 15 -; SI-NEXT: v_readlane_b32 s54, v40, 14 -; SI-NEXT: v_readlane_b32 s53, v40, 13 -; SI-NEXT: v_readlane_b32 s52, v40, 12 -; SI-NEXT: v_readlane_b32 s51, v40, 11 -; SI-NEXT: v_readlane_b32 s50, v40, 10 -; SI-NEXT: v_readlane_b32 s49, v40, 9 -; SI-NEXT: v_readlane_b32 s48, v40, 8 -; SI-NEXT: v_readlane_b32 s39, v40, 7 -; SI-NEXT: v_readlane_b32 s38, v40, 6 -; SI-NEXT: v_readlane_b32 s37, v40, 5 -; SI-NEXT: v_readlane_b32 s36, v40, 4 -; SI-NEXT: v_readlane_b32 s35, v40, 3 -; SI-NEXT: v_readlane_b32 s34, v40, 2 -; SI-NEXT: v_readlane_b32 s31, v40, 1 -; SI-NEXT: v_readlane_b32 s30, v40, 0 +; SI-NEXT: v_readlane_b32 s31, v40, 31 +; SI-NEXT: v_readlane_b32 s87, v40, 29 +; SI-NEXT: v_readlane_b32 s86, v40, 28 +; SI-NEXT: v_readlane_b32 s85, v40, 27 +; SI-NEXT: v_readlane_b32 s84, v40, 26 +; SI-NEXT: v_readlane_b32 s83, v40, 25 +; SI-NEXT: v_readlane_b32 s82, v40, 24 +; SI-NEXT: v_readlane_b32 s81, v40, 23 +; SI-NEXT: v_readlane_b32 s80, v40, 22 +; SI-NEXT: v_readlane_b32 s71, v40, 21 +; SI-NEXT: v_readlane_b32 s70, v40, 20 +; SI-NEXT: v_readlane_b32 s69, v40, 19 +; SI-NEXT: v_readlane_b32 s68, v40, 18 +; SI-NEXT: v_readlane_b32 s67, v40, 17 +; SI-NEXT: v_readlane_b32 s66, v40, 16 +; SI-NEXT: v_readlane_b32 s65, v40, 15 +; SI-NEXT: v_readlane_b32 s64, v40, 14 +; SI-NEXT: v_readlane_b32 s55, v40, 13 +; SI-NEXT: v_readlane_b32 s54, v40, 12 +; SI-NEXT: v_readlane_b32 s53, v40, 11 +; SI-NEXT: v_readlane_b32 s52, v40, 10 +; SI-NEXT: v_readlane_b32 s51, v40, 9 +; SI-NEXT: v_readlane_b32 s50, v40, 8 +; SI-NEXT: v_readlane_b32 s49, v40, 7 +; SI-NEXT: v_readlane_b32 s48, v40, 6 +; SI-NEXT: v_readlane_b32 s39, v40, 5 +; SI-NEXT: v_readlane_b32 s38, v40, 4 +; SI-NEXT: v_readlane_b32 s37, v40, 3 +; SI-NEXT: v_readlane_b32 s36, v40, 2 +; SI-NEXT: v_readlane_b32 s35, v40, 1 +; SI-NEXT: v_readlane_b32 s34, v40, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -52895,30 +52895,30 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v40, s30, 0 -; VI-NEXT: v_writelane_b32 v40, s31, 1 -; VI-NEXT: v_writelane_b32 v40, s34, 2 -; VI-NEXT: v_writelane_b32 v40, s35, 3 -; VI-NEXT: v_writelane_b32 v40, s36, 4 -; VI-NEXT: v_writelane_b32 v40, s37, 5 -; VI-NEXT: v_writelane_b32 v40, s38, 6 -; VI-NEXT: v_writelane_b32 v40, s39, 7 -; VI-NEXT: v_writelane_b32 v40, s48, 8 -; VI-NEXT: v_writelane_b32 v40, s49, 9 -; VI-NEXT: v_writelane_b32 v40, s50, 10 -; VI-NEXT: v_writelane_b32 v40, s51, 11 -; VI-NEXT: v_writelane_b32 v40, s52, 12 -; VI-NEXT: v_writelane_b32 v40, s53, 13 -; VI-NEXT: v_writelane_b32 v40, s54, 14 -; VI-NEXT: v_writelane_b32 v40, s55, 15 -; VI-NEXT: v_writelane_b32 v40, s64, 16 -; VI-NEXT: v_writelane_b32 v40, s65, 17 +; VI-NEXT: v_writelane_b32 v40, s34, 0 +; VI-NEXT: v_writelane_b32 v40, s35, 1 +; VI-NEXT: v_writelane_b32 v40, s36, 2 +; VI-NEXT: v_writelane_b32 v40, s37, 3 +; VI-NEXT: v_writelane_b32 v40, s38, 4 +; VI-NEXT: v_writelane_b32 v40, s39, 5 +; VI-NEXT: v_writelane_b32 v40, s48, 6 +; VI-NEXT: v_writelane_b32 v40, s49, 7 +; VI-NEXT: v_writelane_b32 v40, s50, 8 +; VI-NEXT: v_writelane_b32 v40, s51, 9 +; VI-NEXT: v_writelane_b32 v40, s52, 10 +; VI-NEXT: v_writelane_b32 v40, s53, 11 +; VI-NEXT: v_writelane_b32 v40, s54, 12 +; VI-NEXT: v_writelane_b32 v40, s55, 13 +; VI-NEXT: v_writelane_b32 v40, s64, 14 +; VI-NEXT: v_writelane_b32 v40, s65, 15 +; VI-NEXT: v_writelane_b32 v40, s66, 16 +; VI-NEXT: v_writelane_b32 v40, s67, 17 +; VI-NEXT: v_writelane_b32 v40, s30, 18 +; VI-NEXT: v_writelane_b32 v40, s31, 19 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v40, s66, 18 ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[6:7], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: v_writelane_b32 v40, s67, 19 ; VI-NEXT: s_cbranch_scc0 .LBB85_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -53270,27 +53270,27 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v40, 18 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s67, v40, 19 -; VI-NEXT: v_readlane_b32 s66, v40, 18 -; VI-NEXT: v_readlane_b32 s65, v40, 17 -; VI-NEXT: v_readlane_b32 s64, v40, 16 -; VI-NEXT: v_readlane_b32 s55, v40, 15 -; VI-NEXT: v_readlane_b32 s54, v40, 14 -; VI-NEXT: v_readlane_b32 s53, v40, 13 -; VI-NEXT: v_readlane_b32 s52, v40, 12 -; VI-NEXT: v_readlane_b32 s51, v40, 11 -; VI-NEXT: v_readlane_b32 s50, v40, 10 -; VI-NEXT: v_readlane_b32 s49, v40, 9 -; VI-NEXT: v_readlane_b32 s48, v40, 8 -; VI-NEXT: v_readlane_b32 s39, v40, 7 -; VI-NEXT: v_readlane_b32 s38, v40, 6 -; VI-NEXT: v_readlane_b32 s37, v40, 5 -; VI-NEXT: v_readlane_b32 s36, v40, 4 -; VI-NEXT: v_readlane_b32 s35, v40, 3 -; VI-NEXT: v_readlane_b32 s34, v40, 2 -; VI-NEXT: v_readlane_b32 s31, v40, 1 -; VI-NEXT: v_readlane_b32 s30, v40, 0 +; VI-NEXT: v_readlane_b32 s31, v40, 19 +; VI-NEXT: v_readlane_b32 s67, v40, 17 +; VI-NEXT: v_readlane_b32 s66, v40, 16 +; VI-NEXT: v_readlane_b32 s65, v40, 15 +; VI-NEXT: v_readlane_b32 s64, v40, 14 +; VI-NEXT: v_readlane_b32 s55, v40, 13 +; VI-NEXT: v_readlane_b32 s54, v40, 12 +; VI-NEXT: v_readlane_b32 s53, v40, 11 +; VI-NEXT: v_readlane_b32 s52, v40, 10 +; VI-NEXT: v_readlane_b32 s51, v40, 9 +; VI-NEXT: v_readlane_b32 s50, v40, 8 +; VI-NEXT: v_readlane_b32 s49, v40, 7 +; VI-NEXT: v_readlane_b32 s48, v40, 6 +; VI-NEXT: v_readlane_b32 s39, v40, 5 +; VI-NEXT: v_readlane_b32 s38, v40, 4 +; VI-NEXT: v_readlane_b32 s37, v40, 3 +; VI-NEXT: v_readlane_b32 s36, v40, 2 +; VI-NEXT: v_readlane_b32 s35, v40, 1 +; VI-NEXT: v_readlane_b32 s34, v40, 0 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -53303,26 +53303,26 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s35, 3 -; GFX9-NEXT: v_writelane_b32 v40, s36, 4 -; GFX9-NEXT: v_writelane_b32 v40, s37, 5 -; GFX9-NEXT: v_writelane_b32 v40, s38, 6 -; GFX9-NEXT: v_writelane_b32 v40, s39, 7 -; GFX9-NEXT: v_writelane_b32 v40, s48, 8 -; GFX9-NEXT: v_writelane_b32 v40, s49, 9 -; GFX9-NEXT: v_writelane_b32 v40, s50, 10 -; GFX9-NEXT: v_writelane_b32 v40, s51, 11 -; GFX9-NEXT: v_writelane_b32 v40, s52, 12 -; GFX9-NEXT: v_writelane_b32 v40, s53, 13 +; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s35, 1 +; GFX9-NEXT: v_writelane_b32 v40, s36, 2 +; GFX9-NEXT: v_writelane_b32 v40, s37, 3 +; GFX9-NEXT: v_writelane_b32 v40, s38, 4 +; GFX9-NEXT: v_writelane_b32 v40, s39, 5 +; GFX9-NEXT: v_writelane_b32 v40, s48, 6 +; GFX9-NEXT: v_writelane_b32 v40, s49, 7 +; GFX9-NEXT: v_writelane_b32 v40, s50, 8 +; GFX9-NEXT: v_writelane_b32 v40, s51, 9 +; GFX9-NEXT: v_writelane_b32 v40, s52, 10 +; GFX9-NEXT: v_writelane_b32 v40, s53, 11 +; GFX9-NEXT: v_writelane_b32 v40, s54, 12 +; GFX9-NEXT: v_writelane_b32 v40, s55, 13 +; GFX9-NEXT: v_writelane_b32 v40, s30, 14 +; GFX9-NEXT: v_writelane_b32 v40, s31, 15 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v40, s54, 14 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: v_writelane_b32 v40, s55, 15 ; GFX9-NEXT: s_cbranch_scc0 .LBB85_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -53659,23 +53659,23 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 14 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60 -; GFX9-NEXT: v_readlane_b32 s55, v40, 15 -; GFX9-NEXT: v_readlane_b32 s54, v40, 14 -; GFX9-NEXT: v_readlane_b32 s53, v40, 13 -; GFX9-NEXT: v_readlane_b32 s52, v40, 12 -; GFX9-NEXT: v_readlane_b32 s51, v40, 11 -; GFX9-NEXT: v_readlane_b32 s50, v40, 10 -; GFX9-NEXT: v_readlane_b32 s49, v40, 9 -; GFX9-NEXT: v_readlane_b32 s48, v40, 8 -; GFX9-NEXT: v_readlane_b32 s39, v40, 7 -; GFX9-NEXT: v_readlane_b32 s38, v40, 6 -; GFX9-NEXT: v_readlane_b32 s37, v40, 5 -; GFX9-NEXT: v_readlane_b32 s36, v40, 4 -; GFX9-NEXT: v_readlane_b32 s35, v40, 3 -; GFX9-NEXT: v_readlane_b32 s34, v40, 2 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 15 +; GFX9-NEXT: v_readlane_b32 s55, v40, 13 +; GFX9-NEXT: v_readlane_b32 s54, v40, 12 +; GFX9-NEXT: v_readlane_b32 s53, v40, 11 +; GFX9-NEXT: v_readlane_b32 s52, v40, 10 +; GFX9-NEXT: v_readlane_b32 s51, v40, 9 +; GFX9-NEXT: v_readlane_b32 s50, v40, 8 +; GFX9-NEXT: v_readlane_b32 s49, v40, 7 +; GFX9-NEXT: v_readlane_b32 s48, v40, 6 +; GFX9-NEXT: v_readlane_b32 s39, v40, 5 +; GFX9-NEXT: v_readlane_b32 s38, v40, 4 +; GFX9-NEXT: v_readlane_b32 s37, v40, 3 +; GFX9-NEXT: v_readlane_b32 s36, v40, 2 +; GFX9-NEXT: v_readlane_b32 s35, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 0 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -53688,18 +53688,18 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v33, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v33, s30, 0 +; GFX11-NEXT: v_writelane_b32 v33, s34, 0 +; GFX11-NEXT: v_writelane_b32 v33, s35, 1 +; GFX11-NEXT: v_writelane_b32 v33, s36, 2 +; GFX11-NEXT: v_writelane_b32 v33, s37, 3 +; GFX11-NEXT: v_writelane_b32 v33, s38, 4 +; GFX11-NEXT: v_writelane_b32 v33, s39, 5 +; GFX11-NEXT: v_writelane_b32 v33, s48, 6 +; GFX11-NEXT: v_writelane_b32 v33, s49, 7 +; GFX11-NEXT: v_writelane_b32 v33, s30, 8 +; GFX11-NEXT: v_writelane_b32 v33, s31, 9 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 s90, 0 -; GFX11-NEXT: v_writelane_b32 v33, s31, 1 -; GFX11-NEXT: v_writelane_b32 v33, s34, 2 -; GFX11-NEXT: v_writelane_b32 v33, s35, 3 -; GFX11-NEXT: v_writelane_b32 v33, s36, 4 -; GFX11-NEXT: v_writelane_b32 v33, s37, 5 -; GFX11-NEXT: v_writelane_b32 v33, s38, 6 -; GFX11-NEXT: v_writelane_b32 v33, s39, 7 -; GFX11-NEXT: v_writelane_b32 v33, s48, 8 -; GFX11-NEXT: v_writelane_b32 v33, s49, 9 ; GFX11-NEXT: s_cbranch_scc0 .LBB85_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s42, s27, 24 @@ -54037,21 +54037,21 @@ define inreg <64 x i8> @bitcast_v8f64_to_v64i8_scalar(<8 x double> inreg %a, i32 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v2 ; GFX11-NEXT: v_mov_b32_e32 v2, s0 ; GFX11-NEXT: v_mov_b32_e32 v4, s1 +; GFX11-NEXT: v_readlane_b32 s30, v33, 8 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[22:25], off ; GFX11-NEXT: scratch_store_b128 v0, v[14:17], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[10:13], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 -; GFX11-NEXT: v_readlane_b32 s49, v33, 9 -; GFX11-NEXT: v_readlane_b32 s48, v33, 8 -; GFX11-NEXT: v_readlane_b32 s39, v33, 7 -; GFX11-NEXT: v_readlane_b32 s38, v33, 6 -; GFX11-NEXT: v_readlane_b32 s37, v33, 5 -; GFX11-NEXT: v_readlane_b32 s36, v33, 4 -; GFX11-NEXT: v_readlane_b32 s35, v33, 3 -; GFX11-NEXT: v_readlane_b32 s34, v33, 2 -; GFX11-NEXT: v_readlane_b32 s31, v33, 1 -; GFX11-NEXT: v_readlane_b32 s30, v33, 0 +; GFX11-NEXT: v_readlane_b32 s31, v33, 9 +; GFX11-NEXT: v_readlane_b32 s49, v33, 7 +; GFX11-NEXT: v_readlane_b32 s48, v33, 6 +; GFX11-NEXT: v_readlane_b32 s39, v33, 5 +; GFX11-NEXT: v_readlane_b32 s38, v33, 4 +; GFX11-NEXT: v_readlane_b32 s37, v33, 3 +; GFX11-NEXT: v_readlane_b32 s36, v33, 2 +; GFX11-NEXT: v_readlane_b32 s35, v33, 1 +; GFX11-NEXT: v_readlane_b32 s34, v33, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v33, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -58733,9 +58733,9 @@ define inreg <32 x half> @bitcast_v32i16_to_v32f16_scalar(<32 x i16> inreg %a, i ; SI-LABEL: bitcast_v32i16_to_v32f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: v_mov_b32_e32 v54, v17 ; SI-NEXT: v_mov_b32_e32 v53, v16 ; SI-NEXT: v_mov_b32_e32 v52, v15 @@ -61978,7 +61978,6 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; SI-LABEL: bitcast_v32bf16_to_v32i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -61995,6 +61994,7 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: s_waitcnt expcnt(6) ; SI-NEXT: v_mul_f32_e64 v57, 1.0, s16 @@ -62247,8 +62247,8 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; VI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v20, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v20, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -62566,8 +62566,8 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB95_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v20, 1 ; VI-NEXT: v_readlane_b32 s30, v20, 0 +; VI-NEXT: v_readlane_b32 s31, v20, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -62581,8 +62581,8 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -62901,8 +62901,8 @@ define inreg <32 x i16> @bitcast_v32bf16_to_v32i16_scalar(<32 x bfloat> inreg %a ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB95_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -64359,8 +64359,24 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) { ; ; VI-LABEL: bitcast_v32i16_to_v64i8: ; VI: ; %bb.0: -; VI-NEXT: ; implicit-def: $vgpr19 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: ; implicit-def: $vgpr19 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; kill: killed $vgpr19 @@ -64381,22 +64397,6 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) { ; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; kill: killed $vgpr19 ; VI-NEXT: ; implicit-def: $vgpr19 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; kill: killed $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr17 @@ -64829,10 +64829,6 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v32i16_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -64849,6 +64845,10 @@ define <64 x i8> @bitcast_v32i16_to_v64i8(<32 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -65725,43 +65725,43 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; SI-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_writelane_b32 v20, s30, 0 -; SI-NEXT: v_writelane_b32 v20, s31, 1 -; SI-NEXT: v_writelane_b32 v20, s34, 2 -; SI-NEXT: v_writelane_b32 v20, s35, 3 -; SI-NEXT: v_writelane_b32 v20, s36, 4 -; SI-NEXT: v_writelane_b32 v20, s37, 5 -; SI-NEXT: v_writelane_b32 v20, s38, 6 -; SI-NEXT: v_writelane_b32 v20, s39, 7 -; SI-NEXT: v_writelane_b32 v20, s48, 8 -; SI-NEXT: v_writelane_b32 v20, s49, 9 -; SI-NEXT: v_writelane_b32 v20, s50, 10 -; SI-NEXT: v_writelane_b32 v20, s51, 11 -; SI-NEXT: v_writelane_b32 v20, s52, 12 -; SI-NEXT: v_writelane_b32 v20, s53, 13 -; SI-NEXT: v_writelane_b32 v20, s54, 14 -; SI-NEXT: v_writelane_b32 v20, s55, 15 -; SI-NEXT: v_writelane_b32 v20, s64, 16 -; SI-NEXT: v_writelane_b32 v20, s65, 17 -; SI-NEXT: v_writelane_b32 v20, s66, 18 -; SI-NEXT: v_writelane_b32 v20, s67, 19 -; SI-NEXT: v_writelane_b32 v20, s68, 20 -; SI-NEXT: v_writelane_b32 v20, s69, 21 -; SI-NEXT: v_writelane_b32 v20, s70, 22 -; SI-NEXT: v_writelane_b32 v20, s71, 23 -; SI-NEXT: v_writelane_b32 v20, s80, 24 -; SI-NEXT: v_writelane_b32 v20, s81, 25 -; SI-NEXT: v_writelane_b32 v20, s82, 26 -; SI-NEXT: v_writelane_b32 v20, s83, 27 -; SI-NEXT: v_writelane_b32 v20, s84, 28 -; SI-NEXT: v_writelane_b32 v20, s85, 29 -; SI-NEXT: v_writelane_b32 v20, s86, 30 -; SI-NEXT: v_writelane_b32 v20, s87, 31 -; SI-NEXT: v_writelane_b32 v20, s96, 32 -; SI-NEXT: v_writelane_b32 v20, s97, 33 -; SI-NEXT: v_writelane_b32 v20, s98, 34 +; SI-NEXT: v_writelane_b32 v20, s34, 0 +; SI-NEXT: v_writelane_b32 v20, s35, 1 +; SI-NEXT: v_writelane_b32 v20, s36, 2 +; SI-NEXT: v_writelane_b32 v20, s37, 3 +; SI-NEXT: v_writelane_b32 v20, s38, 4 +; SI-NEXT: v_writelane_b32 v20, s39, 5 +; SI-NEXT: v_writelane_b32 v20, s48, 6 +; SI-NEXT: v_writelane_b32 v20, s49, 7 +; SI-NEXT: v_writelane_b32 v20, s50, 8 +; SI-NEXT: v_writelane_b32 v20, s51, 9 +; SI-NEXT: v_writelane_b32 v20, s52, 10 +; SI-NEXT: v_writelane_b32 v20, s53, 11 +; SI-NEXT: v_writelane_b32 v20, s54, 12 +; SI-NEXT: v_writelane_b32 v20, s55, 13 +; SI-NEXT: v_writelane_b32 v20, s64, 14 +; SI-NEXT: v_writelane_b32 v20, s65, 15 +; SI-NEXT: v_writelane_b32 v20, s66, 16 +; SI-NEXT: v_writelane_b32 v20, s67, 17 +; SI-NEXT: v_writelane_b32 v20, s68, 18 +; SI-NEXT: v_writelane_b32 v20, s69, 19 +; SI-NEXT: v_writelane_b32 v20, s70, 20 +; SI-NEXT: v_writelane_b32 v20, s71, 21 +; SI-NEXT: v_writelane_b32 v20, s80, 22 +; SI-NEXT: v_writelane_b32 v20, s81, 23 +; SI-NEXT: v_writelane_b32 v20, s82, 24 +; SI-NEXT: v_writelane_b32 v20, s83, 25 +; SI-NEXT: v_writelane_b32 v20, s84, 26 +; SI-NEXT: v_writelane_b32 v20, s85, 27 +; SI-NEXT: v_writelane_b32 v20, s86, 28 +; SI-NEXT: v_writelane_b32 v20, s87, 29 +; SI-NEXT: v_writelane_b32 v20, s96, 30 +; SI-NEXT: v_writelane_b32 v20, s97, 31 +; SI-NEXT: v_writelane_b32 v20, s98, 32 +; SI-NEXT: v_writelane_b32 v20, s99, 33 +; SI-NEXT: v_writelane_b32 v20, s30, 34 +; SI-NEXT: v_writelane_b32 v20, s31, 35 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v20, s99, 35 ; SI-NEXT: s_mov_b32 s93, s18 ; SI-NEXT: s_mov_b32 s31, s17 ; SI-NEXT: v_readfirstlane_b32 s59, v18 @@ -66280,45 +66280,45 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v20, 34 ; SI-NEXT: v_readlane_b32 s19, v21, 11 ; SI-NEXT: v_readlane_b32 s17, v21, 17 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s99, v20, 35 -; SI-NEXT: v_readlane_b32 s98, v20, 34 -; SI-NEXT: v_readlane_b32 s97, v20, 33 -; SI-NEXT: v_readlane_b32 s96, v20, 32 -; SI-NEXT: v_readlane_b32 s87, v20, 31 -; SI-NEXT: v_readlane_b32 s86, v20, 30 -; SI-NEXT: v_readlane_b32 s85, v20, 29 -; SI-NEXT: v_readlane_b32 s84, v20, 28 -; SI-NEXT: v_readlane_b32 s83, v20, 27 -; SI-NEXT: v_readlane_b32 s82, v20, 26 -; SI-NEXT: v_readlane_b32 s81, v20, 25 -; SI-NEXT: v_readlane_b32 s80, v20, 24 -; SI-NEXT: v_readlane_b32 s71, v20, 23 -; SI-NEXT: v_readlane_b32 s70, v20, 22 -; SI-NEXT: v_readlane_b32 s69, v20, 21 -; SI-NEXT: v_readlane_b32 s68, v20, 20 -; SI-NEXT: v_readlane_b32 s67, v20, 19 -; SI-NEXT: v_readlane_b32 s66, v20, 18 -; SI-NEXT: v_readlane_b32 s65, v20, 17 -; SI-NEXT: v_readlane_b32 s64, v20, 16 -; SI-NEXT: v_readlane_b32 s55, v20, 15 -; SI-NEXT: v_readlane_b32 s54, v20, 14 -; SI-NEXT: v_readlane_b32 s53, v20, 13 -; SI-NEXT: v_readlane_b32 s52, v20, 12 -; SI-NEXT: v_readlane_b32 s51, v20, 11 -; SI-NEXT: v_readlane_b32 s50, v20, 10 -; SI-NEXT: v_readlane_b32 s49, v20, 9 -; SI-NEXT: v_readlane_b32 s48, v20, 8 -; SI-NEXT: v_readlane_b32 s39, v20, 7 -; SI-NEXT: v_readlane_b32 s38, v20, 6 -; SI-NEXT: v_readlane_b32 s37, v20, 5 -; SI-NEXT: v_readlane_b32 s36, v20, 4 -; SI-NEXT: v_readlane_b32 s35, v20, 3 -; SI-NEXT: v_readlane_b32 s34, v20, 2 -; SI-NEXT: v_readlane_b32 s31, v20, 1 -; SI-NEXT: v_readlane_b32 s30, v20, 0 +; SI-NEXT: v_readlane_b32 s31, v20, 35 +; SI-NEXT: v_readlane_b32 s99, v20, 33 +; SI-NEXT: v_readlane_b32 s98, v20, 32 +; SI-NEXT: v_readlane_b32 s97, v20, 31 +; SI-NEXT: v_readlane_b32 s96, v20, 30 +; SI-NEXT: v_readlane_b32 s87, v20, 29 +; SI-NEXT: v_readlane_b32 s86, v20, 28 +; SI-NEXT: v_readlane_b32 s85, v20, 27 +; SI-NEXT: v_readlane_b32 s84, v20, 26 +; SI-NEXT: v_readlane_b32 s83, v20, 25 +; SI-NEXT: v_readlane_b32 s82, v20, 24 +; SI-NEXT: v_readlane_b32 s81, v20, 23 +; SI-NEXT: v_readlane_b32 s80, v20, 22 +; SI-NEXT: v_readlane_b32 s71, v20, 21 +; SI-NEXT: v_readlane_b32 s70, v20, 20 +; SI-NEXT: v_readlane_b32 s69, v20, 19 +; SI-NEXT: v_readlane_b32 s68, v20, 18 +; SI-NEXT: v_readlane_b32 s67, v20, 17 +; SI-NEXT: v_readlane_b32 s66, v20, 16 +; SI-NEXT: v_readlane_b32 s65, v20, 15 +; SI-NEXT: v_readlane_b32 s64, v20, 14 +; SI-NEXT: v_readlane_b32 s55, v20, 13 +; SI-NEXT: v_readlane_b32 s54, v20, 12 +; SI-NEXT: v_readlane_b32 s53, v20, 11 +; SI-NEXT: v_readlane_b32 s52, v20, 10 +; SI-NEXT: v_readlane_b32 s51, v20, 9 +; SI-NEXT: v_readlane_b32 s50, v20, 8 +; SI-NEXT: v_readlane_b32 s49, v20, 7 +; SI-NEXT: v_readlane_b32 s48, v20, 6 +; SI-NEXT: v_readlane_b32 s39, v20, 5 +; SI-NEXT: v_readlane_b32 s38, v20, 4 +; SI-NEXT: v_readlane_b32 s37, v20, 3 +; SI-NEXT: v_readlane_b32 s36, v20, 2 +; SI-NEXT: v_readlane_b32 s35, v20, 1 +; SI-NEXT: v_readlane_b32 s34, v20, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -66409,30 +66409,30 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v4, s30, 0 -; VI-NEXT: v_writelane_b32 v4, s31, 1 -; VI-NEXT: v_writelane_b32 v4, s34, 2 -; VI-NEXT: v_writelane_b32 v4, s35, 3 -; VI-NEXT: v_writelane_b32 v4, s36, 4 -; VI-NEXT: v_writelane_b32 v4, s37, 5 -; VI-NEXT: v_writelane_b32 v4, s38, 6 -; VI-NEXT: v_writelane_b32 v4, s39, 7 -; VI-NEXT: v_writelane_b32 v4, s48, 8 -; VI-NEXT: v_writelane_b32 v4, s49, 9 -; VI-NEXT: v_writelane_b32 v4, s50, 10 -; VI-NEXT: v_writelane_b32 v4, s51, 11 -; VI-NEXT: v_writelane_b32 v4, s52, 12 -; VI-NEXT: v_writelane_b32 v4, s53, 13 -; VI-NEXT: v_writelane_b32 v4, s54, 14 -; VI-NEXT: v_writelane_b32 v4, s55, 15 -; VI-NEXT: v_writelane_b32 v4, s64, 16 -; VI-NEXT: v_writelane_b32 v4, s65, 17 +; VI-NEXT: v_writelane_b32 v4, s34, 0 +; VI-NEXT: v_writelane_b32 v4, s35, 1 +; VI-NEXT: v_writelane_b32 v4, s36, 2 +; VI-NEXT: v_writelane_b32 v4, s37, 3 +; VI-NEXT: v_writelane_b32 v4, s38, 4 +; VI-NEXT: v_writelane_b32 v4, s39, 5 +; VI-NEXT: v_writelane_b32 v4, s48, 6 +; VI-NEXT: v_writelane_b32 v4, s49, 7 +; VI-NEXT: v_writelane_b32 v4, s50, 8 +; VI-NEXT: v_writelane_b32 v4, s51, 9 +; VI-NEXT: v_writelane_b32 v4, s52, 10 +; VI-NEXT: v_writelane_b32 v4, s53, 11 +; VI-NEXT: v_writelane_b32 v4, s54, 12 +; VI-NEXT: v_writelane_b32 v4, s55, 13 +; VI-NEXT: v_writelane_b32 v4, s64, 14 +; VI-NEXT: v_writelane_b32 v4, s65, 15 +; VI-NEXT: v_writelane_b32 v4, s66, 16 +; VI-NEXT: v_writelane_b32 v4, s67, 17 +; VI-NEXT: v_writelane_b32 v4, s30, 18 +; VI-NEXT: v_writelane_b32 v4, s31, 19 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v4, s66, 18 ; VI-NEXT: v_readfirstlane_b32 s4, v1 ; VI-NEXT: s_and_b64 s[6:7], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s5, v2 -; VI-NEXT: v_writelane_b32 v4, s67, 19 ; VI-NEXT: s_cbranch_scc0 .LBB97_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -66804,27 +66804,27 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; VI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; VI-NEXT: v_add_u32_e32 v0, vcc, 60, v0 ; VI-NEXT: v_mov_b32_e32 v1, s4 +; VI-NEXT: v_readlane_b32 s30, v4, 18 ; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: v_readlane_b32 s67, v4, 19 -; VI-NEXT: v_readlane_b32 s66, v4, 18 -; VI-NEXT: v_readlane_b32 s65, v4, 17 -; VI-NEXT: v_readlane_b32 s64, v4, 16 -; VI-NEXT: v_readlane_b32 s55, v4, 15 -; VI-NEXT: v_readlane_b32 s54, v4, 14 -; VI-NEXT: v_readlane_b32 s53, v4, 13 -; VI-NEXT: v_readlane_b32 s52, v4, 12 -; VI-NEXT: v_readlane_b32 s51, v4, 11 -; VI-NEXT: v_readlane_b32 s50, v4, 10 -; VI-NEXT: v_readlane_b32 s49, v4, 9 -; VI-NEXT: v_readlane_b32 s48, v4, 8 -; VI-NEXT: v_readlane_b32 s39, v4, 7 -; VI-NEXT: v_readlane_b32 s38, v4, 6 -; VI-NEXT: v_readlane_b32 s37, v4, 5 -; VI-NEXT: v_readlane_b32 s36, v4, 4 -; VI-NEXT: v_readlane_b32 s35, v4, 3 -; VI-NEXT: v_readlane_b32 s34, v4, 2 -; VI-NEXT: v_readlane_b32 s31, v4, 1 -; VI-NEXT: v_readlane_b32 s30, v4, 0 +; VI-NEXT: v_readlane_b32 s31, v4, 19 +; VI-NEXT: v_readlane_b32 s67, v4, 17 +; VI-NEXT: v_readlane_b32 s66, v4, 16 +; VI-NEXT: v_readlane_b32 s65, v4, 15 +; VI-NEXT: v_readlane_b32 s64, v4, 14 +; VI-NEXT: v_readlane_b32 s55, v4, 13 +; VI-NEXT: v_readlane_b32 s54, v4, 12 +; VI-NEXT: v_readlane_b32 s53, v4, 11 +; VI-NEXT: v_readlane_b32 s52, v4, 10 +; VI-NEXT: v_readlane_b32 s51, v4, 9 +; VI-NEXT: v_readlane_b32 s50, v4, 8 +; VI-NEXT: v_readlane_b32 s49, v4, 7 +; VI-NEXT: v_readlane_b32 s48, v4, 6 +; VI-NEXT: v_readlane_b32 s39, v4, 5 +; VI-NEXT: v_readlane_b32 s38, v4, 4 +; VI-NEXT: v_readlane_b32 s37, v4, 3 +; VI-NEXT: v_readlane_b32 s36, v4, 2 +; VI-NEXT: v_readlane_b32 s35, v4, 1 +; VI-NEXT: v_readlane_b32 s34, v4, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -66887,26 +66887,6 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_readfirstlane_b32 s4, v1 -; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec -; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -66922,6 +66902,26 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s30, 14 +; GFX9-NEXT: v_writelane_b32 v63, s31, 15 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; GFX9-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec +; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: s_cbranch_scc0 .LBB97_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -67243,22 +67243,22 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; GFX9-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 14 +; GFX9-NEXT: v_readlane_b32 s31, v63, 15 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; GFX9-NEXT: v_or_b32_sdwa v5, v33, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -67312,18 +67312,18 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-NEXT: v_writelane_b32 v40, s30, 8 +; GFX11-NEXT: v_writelane_b32 v40, s31, 9 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 s42, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-NEXT: v_writelane_b32 v40, s49, 9 ; GFX11-NEXT: s_cbranch_scc0 .LBB97_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s43, s27, 24 @@ -67671,21 +67671,21 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32 ; GFX11-NEXT: v_or_b32_e32 v2, v4, v10 ; GFX11-NEXT: v_or_b32_e32 v3, v11, v7 ; GFX11-NEXT: v_or_b32_e32 v4, v12, v8 +; GFX11-NEXT: v_readlane_b32 s30, v40, 8 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[82:85], off ; GFX11-NEXT: scratch_store_b128 v0, v[23:26], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 -; GFX11-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 +; GFX11-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -74579,7 +74579,6 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; SI-LABEL: bitcast_v32bf16_to_v32f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -74596,6 +74595,7 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v18 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mul_f32_e64 v32, 1.0, s16 ; SI-NEXT: v_mul_f32_e64 v33, 1.0, s17 @@ -74893,8 +74893,8 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; VI-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] ; VI-NEXT: v_writelane_b32 v20, s30, 0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_writelane_b32 v20, s31, 1 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; VI-NEXT: v_readfirstlane_b32 s30, v0 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s31, v1 @@ -75212,8 +75212,8 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; VI-NEXT: v_mov_b32_e32 v14, s30 ; VI-NEXT: v_mov_b32_e32 v15, s31 ; VI-NEXT: .LBB103_5: ; %end -; VI-NEXT: v_readlane_b32 s31, v20, 1 ; VI-NEXT: v_readlane_b32 s30, v20, 0 +; VI-NEXT: v_readlane_b32 s31, v20, 1 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -75227,8 +75227,8 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; GFX9-NEXT: buffer_store_dword v20, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v20, s30, 0 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_writelane_b32 v20, s31, 1 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GFX9-NEXT: v_readfirstlane_b32 s30, v0 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s31, v1 @@ -75563,8 +75563,8 @@ define inreg <32 x half> @bitcast_v32bf16_to_v32f16_scalar(<32 x bfloat> inreg % ; GFX9-NEXT: v_mov_b32_e32 v14, s30 ; GFX9-NEXT: v_mov_b32_e32 v15, s31 ; GFX9-NEXT: .LBB103_5: ; %end -; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: v_readlane_b32 s30, v20, 0 +; GFX9-NEXT: v_readlane_b32 s31, v20, 1 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -77045,6 +77045,22 @@ define <64 x i8> @bitcast_v32f16_to_v64i8(<32 x half> %a, i32 %b) { ; VI-LABEL: bitcast_v32f16_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; kill: killed $vgpr17 @@ -77067,22 +77083,6 @@ define <64 x i8> @bitcast_v32f16_to_v64i8(<32 x half> %a, i32 %b) { ; VI-NEXT: v_lshrrev_b32_e32 v53, 16, v1 ; VI-NEXT: ; kill: killed $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: ; implicit-def: $vgpr22 ; VI-NEXT: ; implicit-def: $vgpr24 ; VI-NEXT: ; implicit-def: $vgpr55 @@ -77397,10 +77397,6 @@ define <64 x i8> @bitcast_v32f16_to_v64i8(<32 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v32f16_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -77417,6 +77413,10 @@ define <64 x i8> @bitcast_v32f16_to_v64i8(<32 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr29 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -78293,8 +78293,12 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v40, s30, 0 -; SI-NEXT: v_writelane_b32 v40, s31, 1 +; SI-NEXT: v_writelane_b32 v40, s34, 0 +; SI-NEXT: v_writelane_b32 v40, s35, 1 +; SI-NEXT: v_writelane_b32 v40, s36, 2 +; SI-NEXT: v_writelane_b32 v40, s37, 3 +; SI-NEXT: v_writelane_b32 v40, s30, 4 +; SI-NEXT: v_writelane_b32 v40, s31, 5 ; SI-NEXT: v_cvt_f16_f32_e32 v21, s17 ; SI-NEXT: v_cvt_f16_f32_e32 v20, v2 ; SI-NEXT: v_cvt_f16_f32_e32 v22, v1 @@ -78327,12 +78331,8 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; SI-NEXT: v_cvt_f16_f32_e32 v13, s26 ; SI-NEXT: v_cvt_f16_f32_e32 v17, s29 ; SI-NEXT: v_cvt_f16_f32_e32 v16, s28 -; SI-NEXT: v_writelane_b32 v40, s34, 2 -; SI-NEXT: v_writelane_b32 v40, s35, 3 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; SI-NEXT: v_writelane_b32 v40, s36, 4 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_writelane_b32 v40, s37, 5 ; SI-NEXT: s_cbranch_scc0 .LBB105_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_readfirstlane_b32 s4, v21 @@ -78833,13 +78833,13 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; SI-NEXT: v_or_b32_e32 v1, v2, v1 ; SI-NEXT: v_or_b32_e32 v1, s4, v1 ; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0 +; SI-NEXT: v_readlane_b32 s30, v40, 4 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s37, v40, 5 -; SI-NEXT: v_readlane_b32 s36, v40, 4 -; SI-NEXT: v_readlane_b32 s35, v40, 3 -; SI-NEXT: v_readlane_b32 s34, v40, 2 -; SI-NEXT: v_readlane_b32 s31, v40, 1 -; SI-NEXT: v_readlane_b32 s30, v40, 0 +; SI-NEXT: v_readlane_b32 s31, v40, 5 +; SI-NEXT: v_readlane_b32 s37, v40, 3 +; SI-NEXT: v_readlane_b32 s36, v40, 2 +; SI-NEXT: v_readlane_b32 s35, v40, 1 +; SI-NEXT: v_readlane_b32 s34, v40, 0 ; SI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -78902,30 +78902,6 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_readfirstlane_b32 s4, v1 -; VI-NEXT: s_and_b64 s[6:7], vcc, exec -; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -78941,6 +78917,30 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s30, 18 +; VI-NEXT: v_writelane_b32 v63, s31, 19 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; VI-NEXT: v_readfirstlane_b32 s4, v1 +; VI-NEXT: s_and_b64 s[6:7], vcc, exec +; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: s_cbranch_scc0 .LBB105_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s75, s5, 24 @@ -79320,26 +79320,26 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v54 ; VI-NEXT: v_or_b32_sdwa v1, v49, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 18 +; VI-NEXT: v_readlane_b32 s31, v63, 19 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-NEXT: v_or_b32_sdwa v2, v25, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -79399,26 +79399,6 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 -; GFX9-NEXT: v_writelane_b32 v63, s34, 2 -; GFX9-NEXT: v_writelane_b32 v63, s35, 3 -; GFX9-NEXT: v_writelane_b32 v63, s36, 4 -; GFX9-NEXT: v_writelane_b32 v63, s37, 5 -; GFX9-NEXT: v_writelane_b32 v63, s38, 6 -; GFX9-NEXT: v_writelane_b32 v63, s39, 7 -; GFX9-NEXT: v_writelane_b32 v63, s48, 8 -; GFX9-NEXT: v_writelane_b32 v63, s49, 9 -; GFX9-NEXT: v_writelane_b32 v63, s50, 10 -; GFX9-NEXT: v_writelane_b32 v63, s51, 11 -; GFX9-NEXT: v_writelane_b32 v63, s52, 12 -; GFX9-NEXT: v_writelane_b32 v63, s53, 13 -; GFX9-NEXT: v_writelane_b32 v63, s54, 14 -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v63, s55, 15 -; GFX9-NEXT: v_readfirstlane_b32 s4, v1 -; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec -; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -79434,6 +79414,26 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s34, 0 +; GFX9-NEXT: v_writelane_b32 v63, s35, 1 +; GFX9-NEXT: v_writelane_b32 v63, s36, 2 +; GFX9-NEXT: v_writelane_b32 v63, s37, 3 +; GFX9-NEXT: v_writelane_b32 v63, s38, 4 +; GFX9-NEXT: v_writelane_b32 v63, s39, 5 +; GFX9-NEXT: v_writelane_b32 v63, s48, 6 +; GFX9-NEXT: v_writelane_b32 v63, s49, 7 +; GFX9-NEXT: v_writelane_b32 v63, s50, 8 +; GFX9-NEXT: v_writelane_b32 v63, s51, 9 +; GFX9-NEXT: v_writelane_b32 v63, s52, 10 +; GFX9-NEXT: v_writelane_b32 v63, s53, 11 +; GFX9-NEXT: v_writelane_b32 v63, s54, 12 +; GFX9-NEXT: v_writelane_b32 v63, s55, 13 +; GFX9-NEXT: v_writelane_b32 v63, s30, 14 +; GFX9-NEXT: v_writelane_b32 v63, s31, 15 +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; GFX9-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec +; GFX9-NEXT: v_readfirstlane_b32 s5, v2 ; GFX9-NEXT: s_cbranch_scc0 .LBB105_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s56, s5, 24 @@ -79756,22 +79756,22 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; GFX9-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s55, v63, 15 -; GFX9-NEXT: v_readlane_b32 s54, v63, 14 -; GFX9-NEXT: v_readlane_b32 s53, v63, 13 -; GFX9-NEXT: v_readlane_b32 s52, v63, 12 -; GFX9-NEXT: v_readlane_b32 s51, v63, 11 -; GFX9-NEXT: v_readlane_b32 s50, v63, 10 -; GFX9-NEXT: v_readlane_b32 s49, v63, 9 -; GFX9-NEXT: v_readlane_b32 s48, v63, 8 -; GFX9-NEXT: v_readlane_b32 s39, v63, 7 -; GFX9-NEXT: v_readlane_b32 s38, v63, 6 -; GFX9-NEXT: v_readlane_b32 s37, v63, 5 -; GFX9-NEXT: v_readlane_b32 s36, v63, 4 -; GFX9-NEXT: v_readlane_b32 s35, v63, 3 -; GFX9-NEXT: v_readlane_b32 s34, v63, 2 -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 -; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s30, v63, 14 +; GFX9-NEXT: v_readlane_b32 s31, v63, 15 +; GFX9-NEXT: v_readlane_b32 s55, v63, 13 +; GFX9-NEXT: v_readlane_b32 s54, v63, 12 +; GFX9-NEXT: v_readlane_b32 s53, v63, 11 +; GFX9-NEXT: v_readlane_b32 s52, v63, 10 +; GFX9-NEXT: v_readlane_b32 s51, v63, 9 +; GFX9-NEXT: v_readlane_b32 s50, v63, 8 +; GFX9-NEXT: v_readlane_b32 s49, v63, 7 +; GFX9-NEXT: v_readlane_b32 s48, v63, 6 +; GFX9-NEXT: v_readlane_b32 s39, v63, 5 +; GFX9-NEXT: v_readlane_b32 s38, v63, 4 +; GFX9-NEXT: v_readlane_b32 s37, v63, 3 +; GFX9-NEXT: v_readlane_b32 s36, v63, 2 +; GFX9-NEXT: v_readlane_b32 s35, v63, 1 +; GFX9-NEXT: v_readlane_b32 s34, v63, 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; GFX9-NEXT: v_or_b32_sdwa v5, v33, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -79825,18 +79825,18 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; GFX11-NEXT: s_or_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-NEXT: v_writelane_b32 v40, s30, 8 +; GFX11-NEXT: v_writelane_b32 v40, s31, 9 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 s42, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-NEXT: v_writelane_b32 v40, s49, 9 ; GFX11-NEXT: s_cbranch_scc0 .LBB105_3 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s43, s27, 24 @@ -80184,21 +80184,21 @@ define inreg <64 x i8> @bitcast_v32f16_to_v64i8_scalar(<32 x half> inreg %a, i32 ; GFX11-NEXT: v_or_b32_e32 v2, v4, v10 ; GFX11-NEXT: v_or_b32_e32 v3, v11, v7 ; GFX11-NEXT: v_or_b32_e32 v4, v12, v8 +; GFX11-NEXT: v_readlane_b32 s30, v40, 8 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[82:85], off ; GFX11-NEXT: scratch_store_b128 v0, v[23:26], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 -; GFX11-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 +; GFX11-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 @@ -82605,17 +82605,17 @@ define inreg <32 x half> @bitcast_v64i8_to_v32f16_scalar(<64 x i8> inreg %a, i32 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_writelane_b32 v32, s34, 0 +; SI-NEXT: v_writelane_b32 v32, s35, 1 +; SI-NEXT: v_writelane_b32 v32, s36, 2 +; SI-NEXT: v_writelane_b32 v32, s37, 3 +; SI-NEXT: v_writelane_b32 v32, s38, 4 +; SI-NEXT: v_writelane_b32 v32, s39, 5 +; SI-NEXT: v_writelane_b32 v32, s30, 6 +; SI-NEXT: v_writelane_b32 v32, s31, 7 ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:72 ; SI-NEXT: v_readfirstlane_b32 s46, v20 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v32, s30, 0 -; SI-NEXT: v_writelane_b32 v32, s31, 1 -; SI-NEXT: v_writelane_b32 v32, s34, 2 -; SI-NEXT: v_writelane_b32 v32, s35, 3 -; SI-NEXT: v_writelane_b32 v32, s36, 4 -; SI-NEXT: v_writelane_b32 v32, s37, 5 -; SI-NEXT: v_writelane_b32 v32, s38, 6 -; SI-NEXT: v_writelane_b32 v32, s39, 7 ; SI-NEXT: v_readfirstlane_b32 s74, v30 ; SI-NEXT: v_readfirstlane_b32 s61, v29 ; SI-NEXT: v_readfirstlane_b32 s63, v28 @@ -83031,14 +83031,14 @@ define inreg <32 x half> @bitcast_v64i8_to_v32f16_scalar(<64 x i8> inreg %a, i32 ; SI-NEXT: v_cvt_f32_f16_e32 v30, s5 ; SI-NEXT: v_cvt_f32_f16_e32 v31, s4 ; SI-NEXT: .LBB107_3: ; %end -; SI-NEXT: v_readlane_b32 s39, v32, 7 -; SI-NEXT: v_readlane_b32 s38, v32, 6 -; SI-NEXT: v_readlane_b32 s37, v32, 5 -; SI-NEXT: v_readlane_b32 s36, v32, 4 -; SI-NEXT: v_readlane_b32 s35, v32, 3 -; SI-NEXT: v_readlane_b32 s34, v32, 2 -; SI-NEXT: v_readlane_b32 s31, v32, 1 -; SI-NEXT: v_readlane_b32 s30, v32, 0 +; SI-NEXT: v_readlane_b32 s30, v32, 6 +; SI-NEXT: v_readlane_b32 s31, v32, 7 +; SI-NEXT: v_readlane_b32 s39, v32, 5 +; SI-NEXT: v_readlane_b32 s38, v32, 4 +; SI-NEXT: v_readlane_b32 s37, v32, 3 +; SI-NEXT: v_readlane_b32 s36, v32, 2 +; SI-NEXT: v_readlane_b32 s35, v32, 1 +; SI-NEXT: v_readlane_b32 s34, v32, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -85271,10 +85271,6 @@ define <64 x i8> @bitcast_v32bf16_to_v64i8(<32 x bfloat> %a, i32 %b) { ; VI-LABEL: bitcast_v32bf16_to_v64i8: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: ; kill: killed $vgpr17 -; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -85291,6 +85287,10 @@ define <64 x i8> @bitcast_v32bf16_to_v64i8(<32 x bfloat> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; VI-NEXT: ; implicit-def: $vgpr17 +; VI-NEXT: ; kill: killed $vgpr17 +; VI-NEXT: ; implicit-def: $vgpr17 ; VI-NEXT: ; implicit-def: $vgpr29 ; VI-NEXT: ; implicit-def: $vgpr27 ; VI-NEXT: ; implicit-def: $vgpr22 @@ -85875,12 +85875,6 @@ define <64 x i8> @bitcast_v32bf16_to_v64i8(<32 x bfloat> %a, i32 %b) { ; GFX9-LABEL: bitcast_v32bf16_to_v64i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 -; GFX9-NEXT: ; kill: killed $vgpr17 -; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -85897,6 +85891,12 @@ define <64 x i8> @bitcast_v32bf16_to_v64i8(<32 x bfloat> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 +; GFX9-NEXT: ; kill: killed $vgpr17 +; GFX9-NEXT: ; implicit-def: $vgpr17 ; GFX9-NEXT: ; implicit-def: $vgpr27 ; GFX9-NEXT: ; implicit-def: $vgpr28 ; GFX9-NEXT: ; implicit-def: $vgpr23 @@ -87562,7 +87562,6 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; SI-LABEL: bitcast_v32bf16_to_v64i8_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -87579,6 +87578,7 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mul_f32_e64 v19, 1.0, s17 ; SI-NEXT: v_mul_f32_e32 v33, 1.0, v2 @@ -88167,30 +88167,6 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; VI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v63, s30, 0 -; VI-NEXT: v_writelane_b32 v63, s31, 1 -; VI-NEXT: v_writelane_b32 v63, s34, 2 -; VI-NEXT: v_writelane_b32 v63, s35, 3 -; VI-NEXT: v_writelane_b32 v63, s36, 4 -; VI-NEXT: v_writelane_b32 v63, s37, 5 -; VI-NEXT: v_writelane_b32 v63, s38, 6 -; VI-NEXT: v_writelane_b32 v63, s39, 7 -; VI-NEXT: v_writelane_b32 v63, s48, 8 -; VI-NEXT: v_writelane_b32 v63, s49, 9 -; VI-NEXT: v_writelane_b32 v63, s50, 10 -; VI-NEXT: v_writelane_b32 v63, s51, 11 -; VI-NEXT: v_writelane_b32 v63, s52, 12 -; VI-NEXT: v_writelane_b32 v63, s53, 13 -; VI-NEXT: v_writelane_b32 v63, s54, 14 -; VI-NEXT: v_writelane_b32 v63, s55, 15 -; VI-NEXT: v_writelane_b32 v63, s64, 16 -; VI-NEXT: v_writelane_b32 v63, s65, 17 -; VI-NEXT: v_writelane_b32 v63, s66, 18 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; VI-NEXT: v_writelane_b32 v63, s67, 19 -; VI-NEXT: v_readfirstlane_b32 s4, v1 -; VI-NEXT: s_and_b64 s[6:7], vcc, exec -; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill @@ -88206,6 +88182,30 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_writelane_b32 v63, s34, 0 +; VI-NEXT: v_writelane_b32 v63, s35, 1 +; VI-NEXT: v_writelane_b32 v63, s36, 2 +; VI-NEXT: v_writelane_b32 v63, s37, 3 +; VI-NEXT: v_writelane_b32 v63, s38, 4 +; VI-NEXT: v_writelane_b32 v63, s39, 5 +; VI-NEXT: v_writelane_b32 v63, s48, 6 +; VI-NEXT: v_writelane_b32 v63, s49, 7 +; VI-NEXT: v_writelane_b32 v63, s50, 8 +; VI-NEXT: v_writelane_b32 v63, s51, 9 +; VI-NEXT: v_writelane_b32 v63, s52, 10 +; VI-NEXT: v_writelane_b32 v63, s53, 11 +; VI-NEXT: v_writelane_b32 v63, s54, 12 +; VI-NEXT: v_writelane_b32 v63, s55, 13 +; VI-NEXT: v_writelane_b32 v63, s64, 14 +; VI-NEXT: v_writelane_b32 v63, s65, 15 +; VI-NEXT: v_writelane_b32 v63, s66, 16 +; VI-NEXT: v_writelane_b32 v63, s67, 17 +; VI-NEXT: v_writelane_b32 v63, s30, 18 +; VI-NEXT: v_writelane_b32 v63, s31, 19 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; VI-NEXT: v_readfirstlane_b32 s4, v1 +; VI-NEXT: s_and_b64 s[6:7], vcc, exec +; VI-NEXT: v_readfirstlane_b32 s5, v2 ; VI-NEXT: s_cbranch_scc0 .LBB109_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s56, s5, 24 @@ -88807,26 +88807,26 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v34 ; VI-NEXT: v_or_b32_sdwa v1, v13, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_readlane_b32 s67, v63, 19 -; VI-NEXT: v_readlane_b32 s66, v63, 18 -; VI-NEXT: v_readlane_b32 s65, v63, 17 -; VI-NEXT: v_readlane_b32 s64, v63, 16 -; VI-NEXT: v_readlane_b32 s55, v63, 15 -; VI-NEXT: v_readlane_b32 s54, v63, 14 -; VI-NEXT: v_readlane_b32 s53, v63, 13 -; VI-NEXT: v_readlane_b32 s52, v63, 12 -; VI-NEXT: v_readlane_b32 s51, v63, 11 -; VI-NEXT: v_readlane_b32 s50, v63, 10 -; VI-NEXT: v_readlane_b32 s49, v63, 9 -; VI-NEXT: v_readlane_b32 s48, v63, 8 -; VI-NEXT: v_readlane_b32 s39, v63, 7 -; VI-NEXT: v_readlane_b32 s38, v63, 6 -; VI-NEXT: v_readlane_b32 s37, v63, 5 -; VI-NEXT: v_readlane_b32 s36, v63, 4 -; VI-NEXT: v_readlane_b32 s35, v63, 3 -; VI-NEXT: v_readlane_b32 s34, v63, 2 -; VI-NEXT: v_readlane_b32 s31, v63, 1 -; VI-NEXT: v_readlane_b32 s30, v63, 0 +; VI-NEXT: v_readlane_b32 s30, v63, 18 +; VI-NEXT: v_readlane_b32 s31, v63, 19 +; VI-NEXT: v_readlane_b32 s67, v63, 17 +; VI-NEXT: v_readlane_b32 s66, v63, 16 +; VI-NEXT: v_readlane_b32 s65, v63, 15 +; VI-NEXT: v_readlane_b32 s64, v63, 14 +; VI-NEXT: v_readlane_b32 s55, v63, 13 +; VI-NEXT: v_readlane_b32 s54, v63, 12 +; VI-NEXT: v_readlane_b32 s53, v63, 11 +; VI-NEXT: v_readlane_b32 s52, v63, 10 +; VI-NEXT: v_readlane_b32 s51, v63, 9 +; VI-NEXT: v_readlane_b32 s50, v63, 8 +; VI-NEXT: v_readlane_b32 s49, v63, 7 +; VI-NEXT: v_readlane_b32 s48, v63, 6 +; VI-NEXT: v_readlane_b32 s39, v63, 5 +; VI-NEXT: v_readlane_b32 s38, v63, 4 +; VI-NEXT: v_readlane_b32 s37, v63, 3 +; VI-NEXT: v_readlane_b32 s36, v63, 2 +; VI-NEXT: v_readlane_b32 s35, v63, 1 +; VI-NEXT: v_readlane_b32 s34, v63, 0 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-NEXT: v_or_b32_sdwa v2, v33, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -88884,26 +88884,26 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v4, s30, 0 -; GFX9-NEXT: v_writelane_b32 v4, s31, 1 -; GFX9-NEXT: v_writelane_b32 v4, s34, 2 -; GFX9-NEXT: v_writelane_b32 v4, s35, 3 -; GFX9-NEXT: v_writelane_b32 v4, s36, 4 -; GFX9-NEXT: v_writelane_b32 v4, s37, 5 -; GFX9-NEXT: v_writelane_b32 v4, s38, 6 -; GFX9-NEXT: v_writelane_b32 v4, s39, 7 -; GFX9-NEXT: v_writelane_b32 v4, s48, 8 -; GFX9-NEXT: v_writelane_b32 v4, s49, 9 -; GFX9-NEXT: v_writelane_b32 v4, s50, 10 -; GFX9-NEXT: v_writelane_b32 v4, s51, 11 -; GFX9-NEXT: v_writelane_b32 v4, s52, 12 -; GFX9-NEXT: v_writelane_b32 v4, s53, 13 +; GFX9-NEXT: v_writelane_b32 v4, s34, 0 +; GFX9-NEXT: v_writelane_b32 v4, s35, 1 +; GFX9-NEXT: v_writelane_b32 v4, s36, 2 +; GFX9-NEXT: v_writelane_b32 v4, s37, 3 +; GFX9-NEXT: v_writelane_b32 v4, s38, 4 +; GFX9-NEXT: v_writelane_b32 v4, s39, 5 +; GFX9-NEXT: v_writelane_b32 v4, s48, 6 +; GFX9-NEXT: v_writelane_b32 v4, s49, 7 +; GFX9-NEXT: v_writelane_b32 v4, s50, 8 +; GFX9-NEXT: v_writelane_b32 v4, s51, 9 +; GFX9-NEXT: v_writelane_b32 v4, s52, 10 +; GFX9-NEXT: v_writelane_b32 v4, s53, 11 +; GFX9-NEXT: v_writelane_b32 v4, s54, 12 +; GFX9-NEXT: v_writelane_b32 v4, s55, 13 +; GFX9-NEXT: v_writelane_b32 v4, s30, 14 +; GFX9-NEXT: v_writelane_b32 v4, s31, 15 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GFX9-NEXT: v_writelane_b32 v4, s54, 14 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s5, v2 -; GFX9-NEXT: v_writelane_b32 v4, s55, 15 ; GFX9-NEXT: s_cbranch_scc0 .LBB109_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s92, s5, 24 @@ -89541,23 +89541,23 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; GFX9-NEXT: s_or_b32 s4, s4, s5 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_readlane_b32 s30, v4, 14 ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60 -; GFX9-NEXT: v_readlane_b32 s55, v4, 15 -; GFX9-NEXT: v_readlane_b32 s54, v4, 14 -; GFX9-NEXT: v_readlane_b32 s53, v4, 13 -; GFX9-NEXT: v_readlane_b32 s52, v4, 12 -; GFX9-NEXT: v_readlane_b32 s51, v4, 11 -; GFX9-NEXT: v_readlane_b32 s50, v4, 10 -; GFX9-NEXT: v_readlane_b32 s49, v4, 9 -; GFX9-NEXT: v_readlane_b32 s48, v4, 8 -; GFX9-NEXT: v_readlane_b32 s39, v4, 7 -; GFX9-NEXT: v_readlane_b32 s38, v4, 6 -; GFX9-NEXT: v_readlane_b32 s37, v4, 5 -; GFX9-NEXT: v_readlane_b32 s36, v4, 4 -; GFX9-NEXT: v_readlane_b32 s35, v4, 3 -; GFX9-NEXT: v_readlane_b32 s34, v4, 2 -; GFX9-NEXT: v_readlane_b32 s31, v4, 1 -; GFX9-NEXT: v_readlane_b32 s30, v4, 0 +; GFX9-NEXT: v_readlane_b32 s31, v4, 15 +; GFX9-NEXT: v_readlane_b32 s55, v4, 13 +; GFX9-NEXT: v_readlane_b32 s54, v4, 12 +; GFX9-NEXT: v_readlane_b32 s53, v4, 11 +; GFX9-NEXT: v_readlane_b32 s52, v4, 10 +; GFX9-NEXT: v_readlane_b32 s51, v4, 9 +; GFX9-NEXT: v_readlane_b32 s50, v4, 8 +; GFX9-NEXT: v_readlane_b32 s49, v4, 7 +; GFX9-NEXT: v_readlane_b32 s48, v4, 6 +; GFX9-NEXT: v_readlane_b32 s39, v4, 5 +; GFX9-NEXT: v_readlane_b32 s38, v4, 4 +; GFX9-NEXT: v_readlane_b32 s37, v4, 3 +; GFX9-NEXT: v_readlane_b32 s36, v4, 2 +; GFX9-NEXT: v_readlane_b32 s35, v4, 1 +; GFX9-NEXT: v_readlane_b32 s34, v4, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -89620,19 +89620,19 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; GFX11-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX11-NEXT: scratch_store_b32 off, v17, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 -; GFX11-NEXT: v_writelane_b32 v17, s30, 0 +; GFX11-NEXT: v_writelane_b32 v17, s34, 0 +; GFX11-NEXT: v_writelane_b32 v17, s35, 1 +; GFX11-NEXT: v_writelane_b32 v17, s36, 2 +; GFX11-NEXT: v_writelane_b32 v17, s37, 3 +; GFX11-NEXT: v_writelane_b32 v17, s38, 4 +; GFX11-NEXT: v_writelane_b32 v17, s39, 5 +; GFX11-NEXT: v_writelane_b32 v17, s48, 6 +; GFX11-NEXT: v_writelane_b32 v17, s50, 7 +; GFX11-NEXT: v_writelane_b32 v17, s51, 8 +; GFX11-NEXT: v_writelane_b32 v17, s30, 9 +; GFX11-NEXT: v_writelane_b32 v17, s31, 10 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0 ; GFX11-NEXT: s_mov_b32 vcc_lo, 0 -; GFX11-NEXT: v_writelane_b32 v17, s31, 1 -; GFX11-NEXT: v_writelane_b32 v17, s34, 2 -; GFX11-NEXT: v_writelane_b32 v17, s35, 3 -; GFX11-NEXT: v_writelane_b32 v17, s36, 4 -; GFX11-NEXT: v_writelane_b32 v17, s37, 5 -; GFX11-NEXT: v_writelane_b32 v17, s38, 6 -; GFX11-NEXT: v_writelane_b32 v17, s39, 7 -; GFX11-NEXT: v_writelane_b32 v17, s48, 8 -; GFX11-NEXT: v_writelane_b32 v17, s50, 9 -; GFX11-NEXT: v_writelane_b32 v17, s51, 10 ; GFX11-NEXT: s_cbranch_scc0 .LBB109_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false ; GFX11-NEXT: s_lshr_b32 s62, s27, 24 @@ -90280,22 +90280,22 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a, ; GFX11-NEXT: s_or_b32 s3, s4, s5 ; GFX11-NEXT: v_dual_mov_b32 v13, s0 :: v_dual_mov_b32 v14, s1 ; GFX11-NEXT: v_dual_mov_b32 v15, s2 :: v_dual_mov_b32 v16, s3 +; GFX11-NEXT: v_readlane_b32 s30, v17, 9 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 -; GFX11-NEXT: v_readlane_b32 s51, v17, 10 -; GFX11-NEXT: v_readlane_b32 s50, v17, 9 -; GFX11-NEXT: v_readlane_b32 s48, v17, 8 -; GFX11-NEXT: v_readlane_b32 s39, v17, 7 -; GFX11-NEXT: v_readlane_b32 s38, v17, 6 -; GFX11-NEXT: v_readlane_b32 s37, v17, 5 -; GFX11-NEXT: v_readlane_b32 s36, v17, 4 -; GFX11-NEXT: v_readlane_b32 s35, v17, 3 -; GFX11-NEXT: v_readlane_b32 s34, v17, 2 -; GFX11-NEXT: v_readlane_b32 s31, v17, 1 -; GFX11-NEXT: v_readlane_b32 s30, v17, 0 +; GFX11-NEXT: v_readlane_b32 s31, v17, 10 +; GFX11-NEXT: v_readlane_b32 s51, v17, 8 +; GFX11-NEXT: v_readlane_b32 s50, v17, 7 +; GFX11-NEXT: v_readlane_b32 s48, v17, 6 +; GFX11-NEXT: v_readlane_b32 s39, v17, 5 +; GFX11-NEXT: v_readlane_b32 s38, v17, 4 +; GFX11-NEXT: v_readlane_b32 s37, v17, 3 +; GFX11-NEXT: v_readlane_b32 s36, v17, 2 +; GFX11-NEXT: v_readlane_b32 s35, v17, 1 +; GFX11-NEXT: v_readlane_b32 s34, v17, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v17, off, s32 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll index 5d4df4bde1af8..07c574944ad4e 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll @@ -656,36 +656,36 @@ define inreg <18 x i32> @bitcast_v18f32_to_v18i32_scalar(<18 x float> inreg %a, ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB3_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -2075,36 +2075,36 @@ define inreg <18 x i32> @bitcast_v9f64_to_v18i32_scalar(<9 x double> inreg %a, i ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB11_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -3806,7 +3806,6 @@ define <18 x i32> @bitcast_v36i16_to_v18i32(<36 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36i16_to_v18i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -3823,6 +3822,7 @@ define <18 x i32> @bitcast_v36i16_to_v18i32(<36 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -4121,13 +4121,13 @@ define inreg <18 x i32> @bitcast_v36i16_to_v18i32_scalar(<36 x i16> inreg %a, i3 ; SI-LABEL: bitcast_v36i16_to_v18i32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: v_mov_b32_e32 v32, v20 ; SI-NEXT: v_mov_b32_e32 v33, v18 ; SI-NEXT: v_mov_b32_e32 v34, v16 @@ -4710,7 +4710,6 @@ define <36 x half> @bitcast_v18i32_to_v36f16(<18 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v18i32_to_v36f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -4718,6 +4717,7 @@ define <36 x half> @bitcast_v18i32_to_v36f16(<18 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: ; implicit-def: $vgpr45 ; SI-NEXT: ; implicit-def: $vgpr46 ; SI-NEXT: ; implicit-def: $vgpr43 @@ -6669,7 +6669,6 @@ define <18 x i32> @bitcast_v36f16_to_v18i32(<36 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36f16_to_v18i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -6686,6 +6685,7 @@ define <18 x i32> @bitcast_v36f16_to_v18i32(<36 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -7970,36 +7970,36 @@ define inreg <9 x i64> @bitcast_v18f32_to_v9i64_scalar(<18 x float> inreg %a, i3 ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB21_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -8731,36 +8731,36 @@ define inreg <9 x double> @bitcast_v18f32_to_v9f64_scalar(<18 x float> inreg %a, ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB25_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -9077,36 +9077,36 @@ define inreg <18 x float> @bitcast_v9f64_to_v18f32_scalar(<9 x double> inreg %a, ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB27_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -10939,7 +10939,6 @@ define <18 x float> @bitcast_v36i16_to_v18f32(<36 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36i16_to_v18f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -10956,6 +10955,7 @@ define <18 x float> @bitcast_v36i16_to_v18f32(<36 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -11254,13 +11254,13 @@ define inreg <18 x float> @bitcast_v36i16_to_v18f32_scalar(<36 x i16> inreg %a, ; SI-LABEL: bitcast_v36i16_to_v18f32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: v_mov_b32_e32 v32, v20 ; SI-NEXT: v_mov_b32_e32 v33, v18 ; SI-NEXT: v_mov_b32_e32 v34, v16 @@ -11843,7 +11843,6 @@ define <36 x half> @bitcast_v18f32_to_v36f16(<18 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v18f32_to_v36f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -11851,6 +11850,7 @@ define <36 x half> @bitcast_v18f32_to_v36f16(<18 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: ; implicit-def: $vgpr45 ; SI-NEXT: ; implicit-def: $vgpr46 ; SI-NEXT: ; implicit-def: $vgpr43 @@ -13940,7 +13940,6 @@ define <18 x float> @bitcast_v36f16_to_v18f32(<36 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36f16_to_v18f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -13957,6 +13956,7 @@ define <18 x float> @bitcast_v36f16_to_v18f32(<36 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -15547,36 +15547,36 @@ define inreg <9 x i64> @bitcast_v9f64_to_v9i64_scalar(<9 x double> inreg %a, i32 ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: v_writelane_b32 v32, s36, 0 +; GFX11-NEXT: v_writelane_b32 v32, s37, 1 +; GFX11-NEXT: v_writelane_b32 v32, s38, 2 +; GFX11-NEXT: v_writelane_b32 v32, s39, 3 +; GFX11-NEXT: v_writelane_b32 v32, s48, 4 +; GFX11-NEXT: v_writelane_b32 v32, s49, 5 +; GFX11-NEXT: v_writelane_b32 v32, s50, 6 +; GFX11-NEXT: v_writelane_b32 v32, s51, 7 +; GFX11-NEXT: v_writelane_b32 v32, s52, 8 +; GFX11-NEXT: v_writelane_b32 v32, s53, 9 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-NEXT: s_mov_b32 s53, s29 +; GFX11-NEXT: s_mov_b32 s52, s28 +; GFX11-NEXT: s_mov_b32 s51, s27 +; GFX11-NEXT: s_mov_b32 s50, s26 +; GFX11-NEXT: s_mov_b32 s49, s25 +; GFX11-NEXT: s_mov_b32 s48, s24 ; GFX11-NEXT: s_mov_b32 s47, s23 ; GFX11-NEXT: s_mov_b32 s46, s22 ; GFX11-NEXT: s_mov_b32 s45, s21 -; GFX11-NEXT: v_writelane_b32 v32, s37, 1 ; GFX11-NEXT: s_mov_b32 s44, s20 ; GFX11-NEXT: s_mov_b32 s43, s19 ; GFX11-NEXT: s_mov_b32 s42, s18 ; GFX11-NEXT: s_mov_b32 s41, s17 -; GFX11-NEXT: v_writelane_b32 v32, s38, 2 ; GFX11-NEXT: s_mov_b32 s40, s16 +; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s38, s2 ; GFX11-NEXT: s_mov_b32 s37, s1 ; GFX11-NEXT: s_mov_b32 s36, s0 -; GFX11-NEXT: v_writelane_b32 v32, s39, 3 -; GFX11-NEXT: s_mov_b32 s39, s3 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: s_and_b32 s1, vcc_lo, exec_lo -; GFX11-NEXT: v_writelane_b32 v32, s48, 4 -; GFX11-NEXT: s_mov_b32 s48, s24 -; GFX11-NEXT: v_writelane_b32 v32, s49, 5 -; GFX11-NEXT: s_mov_b32 s49, s25 -; GFX11-NEXT: v_writelane_b32 v32, s50, 6 -; GFX11-NEXT: s_mov_b32 s50, s26 -; GFX11-NEXT: v_writelane_b32 v32, s51, 7 -; GFX11-NEXT: s_mov_b32 s51, s27 -; GFX11-NEXT: v_writelane_b32 v32, s52, 8 -; GFX11-NEXT: s_mov_b32 s52, s28 -; GFX11-NEXT: v_writelane_b32 v32, s53, 9 -; GFX11-NEXT: s_mov_b32 s53, s29 ; GFX11-NEXT: s_cbranch_scc0 .LBB39_3 ; GFX11-NEXT: ; %bb.1: ; %Flow ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 @@ -17288,7 +17288,6 @@ define <9 x i64> @bitcast_v36i16_to_v9i64(<36 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36i16_to_v9i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -17305,6 +17304,7 @@ define <9 x i64> @bitcast_v36i16_to_v9i64(<36 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -17603,13 +17603,13 @@ define inreg <9 x i64> @bitcast_v36i16_to_v9i64_scalar(<36 x i16> inreg %a, i32 ; SI-LABEL: bitcast_v36i16_to_v9i64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: v_mov_b32_e32 v32, v20 ; SI-NEXT: v_mov_b32_e32 v33, v18 ; SI-NEXT: v_mov_b32_e32 v34, v16 @@ -18192,7 +18192,6 @@ define <36 x half> @bitcast_v9i64_to_v36f16(<9 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v9i64_to_v36f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -18200,6 +18199,7 @@ define <36 x half> @bitcast_v9i64_to_v36f16(<9 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: ; implicit-def: $vgpr45 ; SI-NEXT: ; implicit-def: $vgpr46 ; SI-NEXT: ; implicit-def: $vgpr43 @@ -20161,7 +20161,6 @@ define <9 x i64> @bitcast_v36f16_to_v9i64(<36 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36f16_to_v9i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -20178,6 +20177,7 @@ define <9 x i64> @bitcast_v36f16_to_v9i64(<36 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -22864,7 +22864,6 @@ define <9 x double> @bitcast_v36i16_to_v9f64(<36 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36i16_to_v9f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -22881,6 +22880,7 @@ define <9 x double> @bitcast_v36i16_to_v9f64(<36 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -23179,13 +23179,13 @@ define inreg <9 x double> @bitcast_v36i16_to_v9f64_scalar(<36 x i16> inreg %a, i ; SI-LABEL: bitcast_v36i16_to_v9f64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; SI-NEXT: v_mov_b32_e32 v32, v20 ; SI-NEXT: v_mov_b32_e32 v33, v18 ; SI-NEXT: v_mov_b32_e32 v34, v16 @@ -23768,7 +23768,6 @@ define <36 x half> @bitcast_v9f64_to_v36f16(<9 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v9f64_to_v36f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -23776,6 +23775,7 @@ define <36 x half> @bitcast_v9f64_to_v36f16(<9 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 ; SI-NEXT: ; implicit-def: $vgpr45 ; SI-NEXT: ; implicit-def: $vgpr46 ; SI-NEXT: ; implicit-def: $vgpr43 @@ -25787,7 +25787,6 @@ define <9 x double> @bitcast_v36f16_to_v9f64(<36 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v36f16_to_v9f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -25804,6 +25803,7 @@ define <9 x double> @bitcast_v36f16_to_v9f64(<36 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v17 ; GFX9-NEXT: v_mov_b32_e32 v33, v16 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -27586,8 +27586,6 @@ define inreg <36 x half> @bitcast_v36i16_to_v36f16_scalar(<36 x i16> inreg %a, i ; SI-LABEL: bitcast_v36i16_to_v36f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill @@ -27599,6 +27597,8 @@ define inreg <36 x half> @bitcast_v36i16_to_v36f16_scalar(<36 x i16> inreg %a, i ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: s_cbranch_scc0 .LBB57_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_waitcnt expcnt(4) diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll index 44cfd6c28ca6a..1648368af460a 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll @@ -4019,7 +4019,6 @@ define <20 x i32> @bitcast_v40i16_to_v20i32(<40 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40i16_to_v20i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -4036,6 +4035,7 @@ define <20 x i32> @bitcast_v40i16_to_v20i32(<40 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -4376,7 +4376,6 @@ define inreg <20 x i32> @bitcast_v40i16_to_v20i32_scalar(<40 x i16> inreg %a, i3 ; SI-LABEL: bitcast_v40i16_to_v20i32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill @@ -4387,6 +4386,7 @@ define inreg <20 x i32> @bitcast_v40i16_to_v20i32_scalar(<40 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: v_mov_b32_e32 v32, v24 ; SI-NEXT: v_mov_b32_e32 v33, v22 ; SI-NEXT: v_mov_b32_e32 v34, v20 @@ -4913,85 +4913,157 @@ define inreg <20 x i32> @bitcast_v40i16_to_v20i32_scalar(<40 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v20i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -5289,7 +5361,6 @@ define <40 x half> @bitcast_v20i32_to_v40f16(<20 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v20i32_to_v40f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -5303,6 +5374,7 @@ define <40 x half> @bitcast_v20i32_to_v40f16(<20 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: ; implicit-def: $vgpr59 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr57 @@ -7495,7 +7567,6 @@ define <20 x i32> @bitcast_v40f16_to_v20i32(<40 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40f16_to_v20i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -7512,6 +7583,7 @@ define <20 x i32> @bitcast_v40f16_to_v20i32(<40 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -8519,85 +8591,157 @@ define inreg <20 x i32> @bitcast_v40f16_to_v20i32_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v20i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -12218,7 +12362,6 @@ define <20 x float> @bitcast_v40i16_to_v20f32(<40 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40i16_to_v20f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -12235,6 +12378,7 @@ define <20 x float> @bitcast_v40i16_to_v20f32(<40 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -12575,7 +12719,6 @@ define inreg <20 x float> @bitcast_v40i16_to_v20f32_scalar(<40 x i16> inreg %a, ; SI-LABEL: bitcast_v40i16_to_v20f32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill @@ -12586,6 +12729,7 @@ define inreg <20 x float> @bitcast_v40i16_to_v20f32_scalar(<40 x i16> inreg %a, ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: v_mov_b32_e32 v32, v24 ; SI-NEXT: v_mov_b32_e32 v33, v22 ; SI-NEXT: v_mov_b32_e32 v34, v20 @@ -13112,85 +13256,157 @@ define inreg <20 x float> @bitcast_v40i16_to_v20f32_scalar(<40 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v20f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -13488,7 +13704,6 @@ define <40 x half> @bitcast_v20f32_to_v40f16(<20 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v20f32_to_v40f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -13502,6 +13717,7 @@ define <40 x half> @bitcast_v20f32_to_v40f16(<20 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: ; implicit-def: $vgpr59 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr57 @@ -14276,6 +14492,9 @@ define inreg <40 x half> @bitcast_v20f32_to_v40f16_scalar(<20 x float> inreg %a, ; SI-LABEL: bitcast_v20f32_to_v40f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 ; SI-NEXT: v_readfirstlane_b32 s11, v1 ; SI-NEXT: v_readfirstlane_b32 s10, v2 @@ -14284,9 +14503,6 @@ define inreg <40 x half> @bitcast_v20f32_to_v40f16_scalar(<20 x float> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s6, v5 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v6 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -15808,7 +16024,6 @@ define <20 x float> @bitcast_v40f16_to_v20f32(<40 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40f16_to_v20f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -15825,6 +16040,7 @@ define <20 x float> @bitcast_v40f16_to_v20f32(<40 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -16832,85 +17048,157 @@ define inreg <20 x float> @bitcast_v40f16_to_v20f32_scalar(<40 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v20f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -19727,7 +20015,6 @@ define <10 x i64> @bitcast_v40i16_to_v10i64(<40 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40i16_to_v10i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -19744,6 +20031,7 @@ define <10 x i64> @bitcast_v40i16_to_v10i64(<40 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -20084,7 +20372,6 @@ define inreg <10 x i64> @bitcast_v40i16_to_v10i64_scalar(<40 x i16> inreg %a, i3 ; SI-LABEL: bitcast_v40i16_to_v10i64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill @@ -20095,6 +20382,7 @@ define inreg <10 x i64> @bitcast_v40i16_to_v10i64_scalar(<40 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: v_mov_b32_e32 v32, v24 ; SI-NEXT: v_mov_b32_e32 v33, v22 ; SI-NEXT: v_mov_b32_e32 v34, v20 @@ -20621,85 +20909,157 @@ define inreg <10 x i64> @bitcast_v40i16_to_v10i64_scalar(<40 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v10i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -20997,7 +21357,6 @@ define <40 x half> @bitcast_v10i64_to_v40f16(<10 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v10i64_to_v40f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -21011,6 +21370,7 @@ define <40 x half> @bitcast_v10i64_to_v40f16(<10 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: ; implicit-def: $vgpr59 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr57 @@ -23213,7 +23573,6 @@ define <10 x i64> @bitcast_v40f16_to_v10i64(<40 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40f16_to_v10i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -23230,6 +23589,7 @@ define <10 x i64> @bitcast_v40f16_to_v10i64(<40 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -24237,85 +24597,157 @@ define inreg <10 x i64> @bitcast_v40f16_to_v10i64_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v10i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -26466,7 +26898,6 @@ define <10 x double> @bitcast_v40i16_to_v10f64(<40 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40i16_to_v10f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -26483,6 +26914,7 @@ define <10 x double> @bitcast_v40i16_to_v10f64(<40 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -26823,7 +27255,6 @@ define inreg <10 x double> @bitcast_v40i16_to_v10f64_scalar(<40 x i16> inreg %a, ; SI-LABEL: bitcast_v40i16_to_v10f64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill @@ -26834,6 +27265,7 @@ define inreg <10 x double> @bitcast_v40i16_to_v10f64_scalar(<40 x i16> inreg %a, ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; SI-NEXT: v_mov_b32_e32 v32, v24 ; SI-NEXT: v_mov_b32_e32 v33, v22 ; SI-NEXT: v_mov_b32_e32 v34, v20 @@ -27360,85 +27792,157 @@ define inreg <10 x double> @bitcast_v40i16_to_v10f64_scalar(<40 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v10f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -27736,7 +28240,6 @@ define <40 x half> @bitcast_v10f64_to_v40f16(<10 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v10f64_to_v40f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -27750,6 +28253,7 @@ define <40 x half> @bitcast_v10f64_to_v40f16(<10 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 ; SI-NEXT: ; implicit-def: $vgpr59 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr57 @@ -28484,6 +28988,10 @@ define inreg <40 x half> @bitcast_v10f64_to_v40f16_scalar(<10 x double> inreg %a ; SI-LABEL: bitcast_v10f64_to_v40f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 ; SI-NEXT: v_readfirstlane_b32 s8, v1 ; SI-NEXT: v_readfirstlane_b32 s9, v2 @@ -28492,10 +29000,6 @@ define inreg <40 x half> @bitcast_v10f64_to_v40f16_scalar(<10 x double> inreg %a ; SI-NEXT: v_readfirstlane_b32 s4, v5 ; SI-NEXT: s_and_b64 s[10:11], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v6 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s10, s5, 16 @@ -29989,7 +30493,6 @@ define <10 x double> @bitcast_v40f16_to_v10f64(<40 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v40f16_to_v10f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -30006,6 +30509,7 @@ define <10 x double> @bitcast_v40f16_to_v10f64(<40 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v19 ; GFX9-NEXT: v_mov_b32_e32 v33, v18 ; GFX9-NEXT: v_mov_b32_e32 v43, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -31013,85 +31517,157 @@ define inreg <10 x double> @bitcast_v40f16_to_v10f64_scalar(<40 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v10f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 ; GFX11-TRUE16-NEXT: s_clause 0xa ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 @@ -32303,8 +32879,6 @@ define inreg <40 x half> @bitcast_v40i16_to_v40f16_scalar(<40 x i16> inreg %a, i ; SI-LABEL: bitcast_v40i16_to_v40f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -32321,6 +32895,8 @@ define inreg <40 x half> @bitcast_v40i16_to_v40f16_scalar(<40 x i16> inreg %a, i ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: s_cbranch_scc0 .LBB57_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_cvt_f32_f16_e32 v30, v15 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll index 87d5157b3c340..010c7f18fa513 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll @@ -4340,7 +4340,6 @@ define <22 x i32> @bitcast_v44i16_to_v22i32(<44 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44i16_to_v22i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -4357,6 +4356,7 @@ define <22 x i32> @bitcast_v44i16_to_v22i32(<44 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -4739,7 +4739,6 @@ define inreg <22 x i32> @bitcast_v44i16_to_v22i32_scalar(<44 x i16> inreg %a, i3 ; SI-LABEL: bitcast_v44i16_to_v22i32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -4754,6 +4753,7 @@ define inreg <22 x i32> @bitcast_v44i16_to_v22i32_scalar(<44 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: v_mov_b32_e32 v32, v28 ; SI-NEXT: v_mov_b32_e32 v33, v26 ; SI-NEXT: v_mov_b32_e32 v34, v24 @@ -5328,87 +5328,161 @@ define inreg <22 x i32> @bitcast_v44i16_to_v22i32_scalar(<44 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v22i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -5722,10 +5796,6 @@ define <44 x half> @bitcast_v22i32_to_v44f16(<22 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v22i32_to_v44f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 -; SI-NEXT: ; implicit-def: $vgpr26 -; SI-NEXT: ; implicit-def: $vgpr27 -; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -5742,6 +5812,10 @@ define <44 x half> @bitcast_v22i32_to_v44f16(<22 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 +; SI-NEXT: ; implicit-def: $vgpr26 +; SI-NEXT: ; implicit-def: $vgpr27 +; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr24 ; SI-NEXT: ; implicit-def: $vgpr25 ; SI-NEXT: ; implicit-def: $vgpr63 @@ -8182,7 +8256,6 @@ define <22 x i32> @bitcast_v44f16_to_v22i32(<44 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44f16_to_v22i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -8199,6 +8272,7 @@ define <22 x i32> @bitcast_v44f16_to_v22i32(<44 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -9310,87 +9384,161 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v22i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -13303,7 +13451,6 @@ define <22 x float> @bitcast_v44i16_to_v22f32(<44 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44i16_to_v22f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -13320,6 +13467,7 @@ define <22 x float> @bitcast_v44i16_to_v22f32(<44 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -13702,7 +13850,6 @@ define inreg <22 x float> @bitcast_v44i16_to_v22f32_scalar(<44 x i16> inreg %a, ; SI-LABEL: bitcast_v44i16_to_v22f32_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -13717,6 +13864,7 @@ define inreg <22 x float> @bitcast_v44i16_to_v22f32_scalar(<44 x i16> inreg %a, ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: v_mov_b32_e32 v32, v28 ; SI-NEXT: v_mov_b32_e32 v33, v26 ; SI-NEXT: v_mov_b32_e32 v34, v24 @@ -14291,87 +14439,161 @@ define inreg <22 x float> @bitcast_v44i16_to_v22f32_scalar(<44 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v22f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -14685,10 +14907,6 @@ define <44 x half> @bitcast_v22f32_to_v44f16(<22 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v22f32_to_v44f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 -; SI-NEXT: ; implicit-def: $vgpr26 -; SI-NEXT: ; implicit-def: $vgpr27 -; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -14705,6 +14923,10 @@ define <44 x half> @bitcast_v22f32_to_v44f16(<22 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 +; SI-NEXT: ; implicit-def: $vgpr26 +; SI-NEXT: ; implicit-def: $vgpr27 +; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr24 ; SI-NEXT: ; implicit-def: $vgpr25 ; SI-NEXT: ; implicit-def: $vgpr63 @@ -15560,6 +15782,14 @@ define inreg <44 x half> @bitcast_v22f32_to_v44f16_scalar(<22 x float> inreg %a, ; SI-LABEL: bitcast_v22f32_to_v44f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 ; SI-NEXT: v_readfirstlane_b32 s13, v1 ; SI-NEXT: v_readfirstlane_b32 s12, v2 @@ -15570,14 +15800,6 @@ define inreg <44 x half> @bitcast_v22f32_to_v44f16_scalar(<22 x float> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s6, v7 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v8 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -17278,7 +17500,6 @@ define <22 x float> @bitcast_v44f16_to_v22f32(<44 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44f16_to_v22f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -17295,6 +17516,7 @@ define <22 x float> @bitcast_v44f16_to_v22f32(<44 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -18406,87 +18628,161 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v22f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -21552,7 +21848,6 @@ define <11 x i64> @bitcast_v44i16_to_v11i64(<44 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44i16_to_v11i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -21569,6 +21864,7 @@ define <11 x i64> @bitcast_v44i16_to_v11i64(<44 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -21951,7 +22247,6 @@ define inreg <11 x i64> @bitcast_v44i16_to_v11i64_scalar(<44 x i16> inreg %a, i3 ; SI-LABEL: bitcast_v44i16_to_v11i64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -21966,6 +22261,7 @@ define inreg <11 x i64> @bitcast_v44i16_to_v11i64_scalar(<44 x i16> inreg %a, i3 ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: v_mov_b32_e32 v32, v28 ; SI-NEXT: v_mov_b32_e32 v33, v26 ; SI-NEXT: v_mov_b32_e32 v34, v24 @@ -22540,87 +22836,161 @@ define inreg <11 x i64> @bitcast_v44i16_to_v11i64_scalar(<44 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v11i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -22934,10 +23304,6 @@ define <44 x half> @bitcast_v11i64_to_v44f16(<11 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v11i64_to_v44f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 -; SI-NEXT: ; implicit-def: $vgpr26 -; SI-NEXT: ; implicit-def: $vgpr27 -; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -22954,6 +23320,10 @@ define <44 x half> @bitcast_v11i64_to_v44f16(<11 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 +; SI-NEXT: ; implicit-def: $vgpr26 +; SI-NEXT: ; implicit-def: $vgpr27 +; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr24 ; SI-NEXT: ; implicit-def: $vgpr25 ; SI-NEXT: ; implicit-def: $vgpr63 @@ -25406,7 +25776,6 @@ define <11 x i64> @bitcast_v44f16_to_v11i64(<44 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44f16_to_v11i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -25423,6 +25792,7 @@ define <11 x i64> @bitcast_v44f16_to_v11i64(<44 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -26534,87 +26904,161 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v11i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -28968,7 +29412,6 @@ define <11 x double> @bitcast_v44i16_to_v11f64(<44 x i16> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44i16_to_v11f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -28985,6 +29428,7 @@ define <11 x double> @bitcast_v44i16_to_v11f64(<44 x i16> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -29367,7 +29811,6 @@ define inreg <11 x double> @bitcast_v44i16_to_v11f64_scalar(<44 x i16> inreg %a, ; SI-LABEL: bitcast_v44i16_to_v11f64_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -29382,6 +29825,7 @@ define inreg <11 x double> @bitcast_v44i16_to_v11f64_scalar(<44 x i16> inreg %a, ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; SI-NEXT: v_mov_b32_e32 v32, v28 ; SI-NEXT: v_mov_b32_e32 v33, v26 ; SI-NEXT: v_mov_b32_e32 v34, v24 @@ -29956,87 +30400,161 @@ define inreg <11 x double> @bitcast_v44i16_to_v11f64_scalar(<44 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v11f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -30350,10 +30868,6 @@ define <44 x half> @bitcast_v11f64_to_v44f16(<11 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v11f64_to_v44f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 -; SI-NEXT: ; implicit-def: $vgpr26 -; SI-NEXT: ; implicit-def: $vgpr27 -; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -30370,6 +30884,10 @@ define <44 x half> @bitcast_v11f64_to_v44f16(<11 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v23 +; SI-NEXT: ; implicit-def: $vgpr26 +; SI-NEXT: ; implicit-def: $vgpr27 +; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr24 ; SI-NEXT: ; implicit-def: $vgpr25 ; SI-NEXT: ; implicit-def: $vgpr63 @@ -31181,6 +31699,15 @@ define inreg <44 x half> @bitcast_v11f64_to_v44f16_scalar(<11 x double> inreg %a ; SI-LABEL: bitcast_v11f64_to_v44f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 ; SI-NEXT: v_readfirstlane_b32 s10, v1 ; SI-NEXT: v_readfirstlane_b32 s11, v2 @@ -31191,15 +31718,6 @@ define inreg <44 x half> @bitcast_v11f64_to_v44f16_scalar(<11 x double> inreg %a ; SI-NEXT: v_readfirstlane_b32 s4, v7 ; SI-NEXT: s_and_b64 s[12:13], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v8 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s12, s5, 16 @@ -32867,7 +33385,6 @@ define <11 x double> @bitcast_v44f16_to_v11f64(<44 x half> %a, i32 %b) { ; GFX9-LABEL: bitcast_v44f16_to_v11f64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -32884,6 +33401,7 @@ define <11 x double> @bitcast_v44f16_to_v11f64(<44 x half> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_mov_b32_e32 v32, v21 ; GFX9-NEXT: v_mov_b32_e32 v33, v20 ; GFX9-NEXT: v_mov_b32_e32 v45, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v32 @@ -33995,87 +34513,161 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v11f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 ; GFX11-TRUE16-NEXT: s_clause 0xc ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 @@ -35429,7 +36021,6 @@ define inreg <44 x half> @bitcast_v44i16_to_v44f16_scalar(<44 x i16> inreg %a, i ; SI-LABEL: bitcast_v44i16_to_v44f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill @@ -35446,7 +36037,8 @@ define inreg <44 x half> @bitcast_v44i16_to_v44f16_scalar(<44 x i16> inreg %a, i ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: s_cbranch_scc0 .LBB57_4 @@ -37436,7 +38028,6 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; SI-LABEL: bitcast_v44f16_to_v44i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill @@ -37453,6 +38044,7 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: s_waitcnt expcnt(6) ; SI-NEXT: v_cvt_f16_f32_e32 v57, v2 ; SI-NEXT: s_waitcnt expcnt(5) @@ -37500,7 +38092,7 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; SI-NEXT: v_cvt_f16_f32_e32 v38, s25 ; SI-NEXT: v_cvt_f16_f32_e32 v16, s26 ; SI-NEXT: v_cvt_f16_f32_e32 v29, s29 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: s_cbranch_scc0 .LBB59_4 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll index fb2e94fc3b87a..3fbedf74d9e3a 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll @@ -2440,8 +2440,8 @@ define <48 x i16> @bitcast_v24i32_to_v48i16(<24 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v24i32_to_v48i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: ; implicit-def: $vgpr53 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr51 @@ -3193,10 +3193,11 @@ define inreg <48 x i16> @bitcast_v24i32_to_v48i16_scalar(<24 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v12, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v12, s30, 0 -; SI-NEXT: v_writelane_b32 v12, s31, 1 +; SI-NEXT: v_writelane_b32 v12, s34, 0 +; SI-NEXT: v_writelane_b32 v12, s35, 1 +; SI-NEXT: v_writelane_b32 v12, s30, 2 +; SI-NEXT: v_writelane_b32 v12, s31, 3 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; SI-NEXT: v_writelane_b32 v12, s34, 2 ; SI-NEXT: v_readfirstlane_b32 s12, v1 ; SI-NEXT: v_readfirstlane_b32 s13, v2 ; SI-NEXT: v_readfirstlane_b32 s10, v3 @@ -3208,7 +3209,6 @@ define inreg <48 x i16> @bitcast_v24i32_to_v48i16_scalar(<24 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v9 ; SI-NEXT: s_and_b64 s[14:15], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v10 -; SI-NEXT: v_writelane_b32 v12, s35, 3 ; SI-NEXT: s_cbranch_scc0 .LBB13_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s88, s5, 16 @@ -3449,11 +3449,11 @@ define inreg <48 x i16> @bitcast_v24i32_to_v48i16_scalar(<24 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x5c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v12, 2 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s35, v12, 3 -; SI-NEXT: v_readlane_b32 s34, v12, 2 -; SI-NEXT: v_readlane_b32 s31, v12, 1 -; SI-NEXT: v_readlane_b32 s30, v12, 0 +; SI-NEXT: v_readlane_b32 s31, v12, 3 +; SI-NEXT: v_readlane_b32 s35, v12, 1 +; SI-NEXT: v_readlane_b32 s34, v12, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -5655,6 +5655,10 @@ define inreg <24 x i32> @bitcast_v48i16_to_v24i32_scalar(<48 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v48i16_to_v24i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -5680,10 +5684,6 @@ define inreg <24 x i32> @bitcast_v48i16_to_v24i32_scalar(<48 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -5805,89 +5805,165 @@ define inreg <24 x i32> @bitcast_v48i16_to_v24i32_scalar(<48 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v24i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -6216,16 +6292,7 @@ end: define <48 x half> @bitcast_v24i32_to_v48f16(<24 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v24i32_to_v48f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -6246,6 +6313,11 @@ define <48 x half> @bitcast_v24i32_to_v48f16(<24 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr33 ; SI-NEXT: ; implicit-def: $vgpr30 @@ -6290,6 +6362,10 @@ define <48 x half> @bitcast_v24i32_to_v48f16(<24 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -7211,6 +7287,7 @@ define inreg <48 x half> @bitcast_v24i32_to_v48f16_scalar(<24 x i32> inreg %a, i ; SI-LABEL: bitcast_v24i32_to_v48f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; SI-NEXT: v_readfirstlane_b32 s15, v1 ; SI-NEXT: v_readfirstlane_b32 s14, v2 @@ -7223,7 +7300,6 @@ define inreg <48 x half> @bitcast_v24i32_to_v48f16_scalar(<24 x i32> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v9 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v10 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB17_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -10061,6 +10137,10 @@ define inreg <24 x i32> @bitcast_v48f16_to_v24i32_scalar(<48 x half> inreg %a, i ; GFX9-LABEL: bitcast_v48f16_to_v24i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -10086,10 +10166,6 @@ define inreg <24 x i32> @bitcast_v48f16_to_v24i32_scalar(<48 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -10213,89 +10289,165 @@ define inreg <24 x i32> @bitcast_v48f16_to_v24i32_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v24i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -12185,8 +12337,8 @@ define <48 x i16> @bitcast_v24f32_to_v48i16(<24 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v24f32_to_v48i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: ; implicit-def: $vgpr53 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr51 @@ -12910,6 +13062,9 @@ define inreg <48 x i16> @bitcast_v24f32_to_v48i16_scalar(<24 x float> inreg %a, ; SI-LABEL: bitcast_v24f32_to_v48i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; SI-NEXT: v_mov_b32_e32 v23, s16 ; SI-NEXT: v_mov_b32_e32 v24, s17 @@ -12926,9 +13081,6 @@ define inreg <48 x i16> @bitcast_v24f32_to_v48i16_scalar(<24 x float> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v11, s28 ; SI-NEXT: v_mov_b32_e32 v12, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB29_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[25:26], v[9:10], 16 @@ -15492,6 +15644,10 @@ define inreg <24 x float> @bitcast_v48i16_to_v24f32_scalar(<48 x i16> inreg %a, ; GFX9-LABEL: bitcast_v48i16_to_v24f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -15517,10 +15673,6 @@ define inreg <24 x float> @bitcast_v48i16_to_v24f32_scalar(<48 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -15642,89 +15794,165 @@ define inreg <24 x float> @bitcast_v48i16_to_v24f32_scalar(<48 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v24f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -16053,16 +16281,7 @@ end: define <48 x half> @bitcast_v24f32_to_v48f16(<24 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v24f32_to_v48f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -16083,6 +16302,11 @@ define <48 x half> @bitcast_v24f32_to_v48f16(<24 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr33 ; SI-NEXT: ; implicit-def: $vgpr30 @@ -16127,6 +16351,10 @@ define <48 x half> @bitcast_v24f32_to_v48f16(<24 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -17024,18 +17252,6 @@ define inreg <48 x half> @bitcast_v24f32_to_v48f16_scalar(<24 x float> inreg %a, ; SI-LABEL: bitcast_v24f32_to_v48f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; SI-NEXT: v_readfirstlane_b32 s15, v1 -; SI-NEXT: v_readfirstlane_b32 s14, v2 -; SI-NEXT: v_readfirstlane_b32 s13, v3 -; SI-NEXT: v_readfirstlane_b32 s12, v4 -; SI-NEXT: v_readfirstlane_b32 s11, v5 -; SI-NEXT: v_readfirstlane_b32 s10, v6 -; SI-NEXT: v_readfirstlane_b32 s8, v7 -; SI-NEXT: v_readfirstlane_b32 s7, v8 -; SI-NEXT: v_readfirstlane_b32 s6, v9 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s9, v10 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -17049,6 +17265,18 @@ define inreg <48 x half> @bitcast_v24f32_to_v48f16_scalar(<24 x float> inreg %a, ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; SI-NEXT: v_readfirstlane_b32 s15, v1 +; SI-NEXT: v_readfirstlane_b32 s14, v2 +; SI-NEXT: v_readfirstlane_b32 s13, v3 +; SI-NEXT: v_readfirstlane_b32 s12, v4 +; SI-NEXT: v_readfirstlane_b32 s11, v5 +; SI-NEXT: v_readfirstlane_b32 s10, v6 +; SI-NEXT: v_readfirstlane_b32 s8, v7 +; SI-NEXT: v_readfirstlane_b32 s7, v8 +; SI-NEXT: v_readfirstlane_b32 s6, v9 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s9, v10 ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -20039,6 +20267,10 @@ define inreg <24 x float> @bitcast_v48f16_to_v24f32_scalar(<48 x half> inreg %a, ; GFX9-LABEL: bitcast_v48f16_to_v24f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -20064,10 +20296,6 @@ define inreg <24 x float> @bitcast_v48f16_to_v24f32_scalar(<48 x half> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -20191,89 +20419,165 @@ define inreg <24 x float> @bitcast_v48f16_to_v24f32_scalar(<48 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v24f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -21365,8 +21669,8 @@ define <48 x i16> @bitcast_v12i64_to_v48i16(<12 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v12i64_to_v48i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: ; implicit-def: $vgpr53 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr50 @@ -22130,10 +22434,11 @@ define inreg <48 x i16> @bitcast_v12i64_to_v48i16_scalar(<12 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v12, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v12, s30, 0 -; SI-NEXT: v_writelane_b32 v12, s31, 1 +; SI-NEXT: v_writelane_b32 v12, s34, 0 +; SI-NEXT: v_writelane_b32 v12, s35, 1 +; SI-NEXT: v_writelane_b32 v12, s30, 2 +; SI-NEXT: v_writelane_b32 v12, s31, 3 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; SI-NEXT: v_writelane_b32 v12, s34, 2 ; SI-NEXT: v_readfirstlane_b32 s12, v1 ; SI-NEXT: v_readfirstlane_b32 s13, v2 ; SI-NEXT: v_readfirstlane_b32 s10, v3 @@ -22145,7 +22450,6 @@ define inreg <48 x i16> @bitcast_v12i64_to_v48i16_scalar(<12 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v9 ; SI-NEXT: s_and_b64 s[14:15], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v10 -; SI-NEXT: v_writelane_b32 v12, s35, 3 ; SI-NEXT: s_cbranch_scc0 .LBB41_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s88, s5, 16 @@ -22386,11 +22690,11 @@ define inreg <48 x i16> @bitcast_v12i64_to_v48i16_scalar(<12 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x5c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v12, 2 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s35, v12, 3 -; SI-NEXT: v_readlane_b32 s34, v12, 2 -; SI-NEXT: v_readlane_b32 s31, v12, 1 -; SI-NEXT: v_readlane_b32 s30, v12, 0 +; SI-NEXT: v_readlane_b32 s31, v12, 3 +; SI-NEXT: v_readlane_b32 s35, v12, 1 +; SI-NEXT: v_readlane_b32 s34, v12, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -24592,6 +24896,10 @@ define inreg <12 x i64> @bitcast_v48i16_to_v12i64_scalar(<48 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v48i16_to_v12i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -24617,10 +24925,6 @@ define inreg <12 x i64> @bitcast_v48i16_to_v12i64_scalar(<48 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -24742,89 +25046,165 @@ define inreg <12 x i64> @bitcast_v48i16_to_v12i64_scalar(<48 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v12i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -25153,16 +25533,7 @@ end: define <48 x half> @bitcast_v12i64_to_v48f16(<12 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v12i64_to_v48f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -25183,6 +25554,11 @@ define <48 x half> @bitcast_v12i64_to_v48f16(<12 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr33 ; SI-NEXT: ; implicit-def: $vgpr30 @@ -25227,6 +25603,10 @@ define <48 x half> @bitcast_v12i64_to_v48f16(<12 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -26160,6 +26540,7 @@ define inreg <48 x half> @bitcast_v12i64_to_v48f16_scalar(<12 x i64> inreg %a, i ; SI-LABEL: bitcast_v12i64_to_v48f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; SI-NEXT: v_readfirstlane_b32 s14, v1 ; SI-NEXT: v_readfirstlane_b32 s15, v2 @@ -26172,7 +26553,6 @@ define inreg <48 x half> @bitcast_v12i64_to_v48f16_scalar(<12 x i64> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v9 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v10 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB45_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -29010,6 +29390,10 @@ define inreg <12 x i64> @bitcast_v48f16_to_v12i64_scalar(<48 x half> inreg %a, i ; GFX9-LABEL: bitcast_v48f16_to_v12i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -29035,10 +29419,6 @@ define inreg <12 x i64> @bitcast_v48f16_to_v12i64_scalar(<48 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -29162,89 +29542,165 @@ define inreg <12 x i64> @bitcast_v48f16_to_v12i64_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v12i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -29574,8 +30030,8 @@ define <48 x i16> @bitcast_v12f64_to_v48i16(<12 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v12f64_to_v48i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 ; SI-NEXT: ; implicit-def: $vgpr53 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr50 @@ -30263,6 +30719,9 @@ define inreg <48 x i16> @bitcast_v12f64_to_v48i16_scalar(<12 x double> inreg %a, ; SI-LABEL: bitcast_v12f64_to_v48i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; SI-NEXT: v_mov_b32_e32 v23, s16 ; SI-NEXT: v_mov_b32_e32 v24, s17 @@ -30279,9 +30738,6 @@ define inreg <48 x i16> @bitcast_v12f64_to_v48i16_scalar(<12 x double> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v11, s28 ; SI-NEXT: v_mov_b32_e32 v12, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB49_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[25:26], v[9:10], 16 @@ -32809,6 +33265,10 @@ define inreg <12 x double> @bitcast_v48i16_to_v12f64_scalar(<48 x i16> inreg %a, ; GFX9-LABEL: bitcast_v48i16_to_v12f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -32834,10 +33294,6 @@ define inreg <12 x double> @bitcast_v48i16_to_v12f64_scalar(<48 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -32959,89 +33415,165 @@ define inreg <12 x double> @bitcast_v48i16_to_v12f64_scalar(<48 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v12f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 @@ -33370,16 +33902,7 @@ end: define <48 x half> @bitcast_v12f64_to_v48f16(<12 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v12f64_to_v48f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 -; SI-NEXT: ; implicit-def: $vgpr35 -; SI-NEXT: ; implicit-def: $vgpr34 -; SI-NEXT: ; kill: killed $vgpr35 -; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -33400,6 +33923,11 @@ define <48 x half> @bitcast_v12f64_to_v48f16(<12 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v25 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr33 ; SI-NEXT: ; implicit-def: $vgpr30 @@ -33444,6 +33972,10 @@ define <48 x half> @bitcast_v12f64_to_v48f16(<12 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr34 ; SI-NEXT: ; kill: killed $vgpr35 +; SI-NEXT: ; kill: killed $vgpr34 +; SI-NEXT: ; implicit-def: $vgpr35 +; SI-NEXT: ; implicit-def: $vgpr34 +; SI-NEXT: ; kill: killed $vgpr35 ; SI-NEXT: ; implicit-def: $vgpr35 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -34293,18 +34825,6 @@ define inreg <48 x half> @bitcast_v12f64_to_v48f16_scalar(<12 x double> inreg %a ; SI-LABEL: bitcast_v12f64_to_v48f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 -; SI-NEXT: v_readfirstlane_b32 s12, v1 -; SI-NEXT: v_readfirstlane_b32 s13, v2 -; SI-NEXT: v_readfirstlane_b32 s10, v3 -; SI-NEXT: v_readfirstlane_b32 s11, v4 -; SI-NEXT: v_readfirstlane_b32 s8, v5 -; SI-NEXT: v_readfirstlane_b32 s9, v6 -; SI-NEXT: v_readfirstlane_b32 s6, v7 -; SI-NEXT: v_readfirstlane_b32 s7, v8 -; SI-NEXT: v_readfirstlane_b32 s4, v9 -; SI-NEXT: s_and_b64 s[14:15], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s5, v10 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -34319,6 +34839,18 @@ define inreg <48 x half> @bitcast_v12f64_to_v48f16_scalar(<12 x double> inreg %a ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; SI-NEXT: v_readfirstlane_b32 s12, v1 +; SI-NEXT: v_readfirstlane_b32 s13, v2 +; SI-NEXT: v_readfirstlane_b32 s10, v3 +; SI-NEXT: v_readfirstlane_b32 s11, v4 +; SI-NEXT: v_readfirstlane_b32 s8, v5 +; SI-NEXT: v_readfirstlane_b32 s9, v6 +; SI-NEXT: v_readfirstlane_b32 s6, v7 +; SI-NEXT: v_readfirstlane_b32 s7, v8 +; SI-NEXT: v_readfirstlane_b32 s4, v9 +; SI-NEXT: s_and_b64 s[14:15], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s5, v10 ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s14, s5, 16 @@ -37274,6 +37806,10 @@ define inreg <12 x double> @bitcast_v48f16_to_v12f64_scalar(<48 x half> inreg %a ; GFX9-LABEL: bitcast_v48f16_to_v12f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v9 ; GFX9-NEXT: v_mov_b32_e32 v33, v8 ; GFX9-NEXT: v_mov_b32_e32 v34, v7 @@ -37299,10 +37835,6 @@ define inreg <12 x double> @bitcast_v48f16_to_v12f64_scalar(<48 x half> inreg %a ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v51, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v34 @@ -37426,89 +37958,165 @@ define inreg <12 x double> @bitcast_v48f16_to_v12f64_scalar(<48 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v12f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 ; GFX11-TRUE16-NEXT: s_clause 0xe ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll index 07cdbef82d892..282e7a7953de6 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll @@ -2570,12 +2570,12 @@ define <52 x i16> @bitcast_v26i32_to_v52i16(<26 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v26i32_to_v52i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -2866,11 +2866,11 @@ define <52 x i16> @bitcast_v26i32_to_v52i16(<26 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v26i32_to_v52i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -3047,11 +3047,11 @@ define <52 x i16> @bitcast_v26i32_to_v52i16(<26 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v26i32_to_v52i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -3412,15 +3412,16 @@ define inreg <52 x i16> @bitcast_v26i32_to_v52i16_scalar(<26 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v14, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v14, s30, 0 -; SI-NEXT: v_writelane_b32 v14, s31, 1 -; SI-NEXT: v_writelane_b32 v14, s34, 2 -; SI-NEXT: v_writelane_b32 v14, s35, 3 -; SI-NEXT: v_writelane_b32 v14, s36, 4 -; SI-NEXT: v_writelane_b32 v14, s37, 5 -; SI-NEXT: v_writelane_b32 v14, s38, 6 +; SI-NEXT: v_writelane_b32 v14, s34, 0 +; SI-NEXT: v_writelane_b32 v14, s35, 1 +; SI-NEXT: v_writelane_b32 v14, s36, 2 +; SI-NEXT: v_writelane_b32 v14, s37, 3 +; SI-NEXT: v_writelane_b32 v14, s38, 4 +; SI-NEXT: v_writelane_b32 v14, s39, 5 +; SI-NEXT: v_writelane_b32 v14, s48, 6 +; SI-NEXT: v_writelane_b32 v14, s30, 7 +; SI-NEXT: v_writelane_b32 v14, s31, 8 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; SI-NEXT: v_writelane_b32 v14, s39, 7 ; SI-NEXT: v_readfirstlane_b32 s14, v1 ; SI-NEXT: v_readfirstlane_b32 s15, v2 ; SI-NEXT: v_readfirstlane_b32 s12, v3 @@ -3434,7 +3435,6 @@ define inreg <52 x i16> @bitcast_v26i32_to_v52i16_scalar(<26 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v11 ; SI-NEXT: s_and_b64 s[40:41], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v12 -; SI-NEXT: v_writelane_b32 v14, s48, 8 ; SI-NEXT: s_cbranch_scc0 .LBB13_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s92, s5, 16 @@ -3693,16 +3693,16 @@ define inreg <52 x i16> @bitcast_v26i32_to_v52i16_scalar(<26 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x64, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v14, 7 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s48, v14, 8 -; SI-NEXT: v_readlane_b32 s39, v14, 7 -; SI-NEXT: v_readlane_b32 s38, v14, 6 -; SI-NEXT: v_readlane_b32 s37, v14, 5 -; SI-NEXT: v_readlane_b32 s36, v14, 4 -; SI-NEXT: v_readlane_b32 s35, v14, 3 -; SI-NEXT: v_readlane_b32 s34, v14, 2 -; SI-NEXT: v_readlane_b32 s31, v14, 1 -; SI-NEXT: v_readlane_b32 s30, v14, 0 +; SI-NEXT: v_readlane_b32 s31, v14, 8 +; SI-NEXT: v_readlane_b32 s48, v14, 6 +; SI-NEXT: v_readlane_b32 s39, v14, 5 +; SI-NEXT: v_readlane_b32 s38, v14, 4 +; SI-NEXT: v_readlane_b32 s37, v14, 3 +; SI-NEXT: v_readlane_b32 s36, v14, 2 +; SI-NEXT: v_readlane_b32 s35, v14, 1 +; SI-NEXT: v_readlane_b32 s34, v14, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v14, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -6114,6 +6114,14 @@ define inreg <26 x i32> @bitcast_v52i16_to_v26i32_scalar(<52 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v52i16_to_v26i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -6141,14 +6149,6 @@ define inreg <26 x i32> @bitcast_v52i16_to_v26i32_scalar(<52 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -6286,90 +6286,167 @@ define inreg <26 x i32> @bitcast_v52i16_to_v26i32_scalar(<52 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v26i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -6716,16 +6793,7 @@ end: define <52 x half> @bitcast_v26i32_to_v52f16(<26 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v26i32_to_v52f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -6746,6 +6814,11 @@ define <52 x half> @bitcast_v26i32_to_v52f16(<26 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr49 ; SI-NEXT: ; implicit-def: $vgpr38 @@ -6800,6 +6873,10 @@ define <52 x half> @bitcast_v26i32_to_v52f16(<26 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -7290,11 +7367,11 @@ define <52 x half> @bitcast_v26i32_to_v52f16(<26 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v26i32_to_v52f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -7471,11 +7548,11 @@ define <52 x half> @bitcast_v26i32_to_v52f16(<26 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v26i32_to_v52f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -7832,6 +7909,11 @@ define inreg <52 x half> @bitcast_v26i32_to_v52f16_scalar(<26 x i32> inreg %a, i ; SI-LABEL: bitcast_v26i32_to_v52f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; SI-NEXT: v_readfirstlane_b32 s41, v1 ; SI-NEXT: v_readfirstlane_b32 s40, v2 @@ -7846,11 +7928,6 @@ define inreg <52 x half> @bitcast_v26i32_to_v52f16_scalar(<26 x i32> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v11 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v12 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB17_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -10938,6 +11015,14 @@ define inreg <26 x i32> @bitcast_v52f16_to_v26i32_scalar(<52 x half> inreg %a, i ; GFX9-LABEL: bitcast_v52f16_to_v26i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -10965,14 +11050,6 @@ define inreg <26 x i32> @bitcast_v52f16_to_v26i32_scalar(<52 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -11112,90 +11189,167 @@ define inreg <26 x i32> @bitcast_v52f16_to_v26i32_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v26i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -13185,12 +13339,12 @@ define <52 x i16> @bitcast_v26f32_to_v52i16(<26 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v26f32_to_v52i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -13481,11 +13635,11 @@ define <52 x i16> @bitcast_v26f32_to_v52i16(<26 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v26f32_to_v52i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -13662,11 +13816,11 @@ define <52 x i16> @bitcast_v26f32_to_v52i16(<26 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v26f32_to_v52i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -13997,6 +14151,14 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; SI-LABEL: bitcast_v26f32_to_v52i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; SI-NEXT: v_mov_b32_e32 v25, s16 ; SI-NEXT: v_mov_b32_e32 v26, s17 @@ -14013,14 +14175,6 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v13, s28 ; SI-NEXT: v_mov_b32_e32 v14, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB29_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[27:28], v[11:12], 16 @@ -14314,6 +14468,10 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; VI-LABEL: bitcast_v26f32_to_v52i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; VI-NEXT: v_mov_b32_e32 v22, s16 ; VI-NEXT: v_mov_b32_e32 v20, s17 @@ -14330,10 +14488,6 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v19, s27 ; VI-NEXT: v_mov_b32_e32 v12, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB29_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v38, 16, v11 @@ -14519,6 +14673,10 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; GFX9-LABEL: bitcast_v26f32_to_v52i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_mov_b32_e32 v22, s16 ; GFX9-NEXT: v_mov_b32_e32 v20, s17 @@ -14535,10 +14693,6 @@ define inreg <52 x i16> @bitcast_v26f32_to_v52i16_scalar(<26 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v19, s27 ; GFX9-NEXT: v_mov_b32_e32 v12, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB29_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v38, 16, v11 @@ -16849,6 +17003,14 @@ define inreg <26 x float> @bitcast_v52i16_to_v26f32_scalar(<52 x i16> inreg %a, ; GFX9-LABEL: bitcast_v52i16_to_v26f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -16876,14 +17038,6 @@ define inreg <26 x float> @bitcast_v52i16_to_v26f32_scalar(<52 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -17021,90 +17175,167 @@ define inreg <26 x float> @bitcast_v52i16_to_v26f32_scalar(<52 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v26f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -17451,16 +17682,7 @@ end: define <52 x half> @bitcast_v26f32_to_v52f16(<26 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v26f32_to_v52f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -17481,6 +17703,11 @@ define <52 x half> @bitcast_v26f32_to_v52f16(<26 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr49 ; SI-NEXT: ; implicit-def: $vgpr38 @@ -17535,6 +17762,10 @@ define <52 x half> @bitcast_v26f32_to_v52f16(<26 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -18025,11 +18256,11 @@ define <52 x half> @bitcast_v26f32_to_v52f16(<26 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v26f32_to_v52f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -18206,11 +18437,11 @@ define <52 x half> @bitcast_v26f32_to_v52f16(<26 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v26f32_to_v52f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -18541,20 +18772,6 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; SI-LABEL: bitcast_v26f32_to_v52f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; SI-NEXT: v_readfirstlane_b32 s41, v1 -; SI-NEXT: v_readfirstlane_b32 s40, v2 -; SI-NEXT: v_readfirstlane_b32 s15, v3 -; SI-NEXT: v_readfirstlane_b32 s14, v4 -; SI-NEXT: v_readfirstlane_b32 s13, v5 -; SI-NEXT: v_readfirstlane_b32 s12, v6 -; SI-NEXT: v_readfirstlane_b32 s11, v7 -; SI-NEXT: v_readfirstlane_b32 s10, v8 -; SI-NEXT: v_readfirstlane_b32 s8, v9 -; SI-NEXT: v_readfirstlane_b32 s7, v10 -; SI-NEXT: v_readfirstlane_b32 s6, v11 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s9, v12 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -18571,6 +18788,20 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; SI-NEXT: v_readfirstlane_b32 s41, v1 +; SI-NEXT: v_readfirstlane_b32 s40, v2 +; SI-NEXT: v_readfirstlane_b32 s15, v3 +; SI-NEXT: v_readfirstlane_b32 s14, v4 +; SI-NEXT: v_readfirstlane_b32 s13, v5 +; SI-NEXT: v_readfirstlane_b32 s12, v6 +; SI-NEXT: v_readfirstlane_b32 s11, v7 +; SI-NEXT: v_readfirstlane_b32 s10, v8 +; SI-NEXT: v_readfirstlane_b32 s8, v9 +; SI-NEXT: v_readfirstlane_b32 s7, v10 +; SI-NEXT: v_readfirstlane_b32 s6, v11 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s9, v12 ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -19022,6 +19253,10 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; VI-LABEL: bitcast_v26f32_to_v52f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; VI-NEXT: v_mov_b32_e32 v22, s16 ; VI-NEXT: v_mov_b32_e32 v20, s17 @@ -19038,10 +19273,6 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v19, s27 ; VI-NEXT: v_mov_b32_e32 v12, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB33_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v38, 16, v11 @@ -19227,6 +19458,10 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; GFX9-LABEL: bitcast_v26f32_to_v52f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_mov_b32_e32 v22, s16 ; GFX9-NEXT: v_mov_b32_e32 v20, s17 @@ -19243,10 +19478,6 @@ define inreg <52 x half> @bitcast_v26f32_to_v52f16_scalar(<26 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v19, s27 ; GFX9-NEXT: v_mov_b32_e32 v12, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB33_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v38, 16, v11 @@ -21831,6 +22062,14 @@ define inreg <26 x float> @bitcast_v52f16_to_v26f32_scalar(<52 x half> inreg %a, ; GFX9-LABEL: bitcast_v52f16_to_v26f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -21858,14 +22097,6 @@ define inreg <26 x float> @bitcast_v52f16_to_v26f32_scalar(<52 x half> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -22005,90 +22236,167 @@ define inreg <26 x float> @bitcast_v52f16_to_v26f32_scalar(<52 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v26f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -23238,12 +23546,12 @@ define <52 x i16> @bitcast_v13i64_to_v52i16(<13 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v13i64_to_v52i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -23534,11 +23842,11 @@ define <52 x i16> @bitcast_v13i64_to_v52i16(<13 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v13i64_to_v52i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -23715,11 +24023,11 @@ define <52 x i16> @bitcast_v13i64_to_v52i16(<13 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v13i64_to_v52i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -24094,15 +24402,16 @@ define inreg <52 x i16> @bitcast_v13i64_to_v52i16_scalar(<13 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v14, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v14, s30, 0 -; SI-NEXT: v_writelane_b32 v14, s31, 1 -; SI-NEXT: v_writelane_b32 v14, s34, 2 -; SI-NEXT: v_writelane_b32 v14, s35, 3 -; SI-NEXT: v_writelane_b32 v14, s36, 4 -; SI-NEXT: v_writelane_b32 v14, s37, 5 -; SI-NEXT: v_writelane_b32 v14, s38, 6 +; SI-NEXT: v_writelane_b32 v14, s34, 0 +; SI-NEXT: v_writelane_b32 v14, s35, 1 +; SI-NEXT: v_writelane_b32 v14, s36, 2 +; SI-NEXT: v_writelane_b32 v14, s37, 3 +; SI-NEXT: v_writelane_b32 v14, s38, 4 +; SI-NEXT: v_writelane_b32 v14, s39, 5 +; SI-NEXT: v_writelane_b32 v14, s48, 6 +; SI-NEXT: v_writelane_b32 v14, s30, 7 +; SI-NEXT: v_writelane_b32 v14, s31, 8 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; SI-NEXT: v_writelane_b32 v14, s39, 7 ; SI-NEXT: v_readfirstlane_b32 s14, v1 ; SI-NEXT: v_readfirstlane_b32 s15, v2 ; SI-NEXT: v_readfirstlane_b32 s12, v3 @@ -24116,7 +24425,6 @@ define inreg <52 x i16> @bitcast_v13i64_to_v52i16_scalar(<13 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v11 ; SI-NEXT: s_and_b64 s[40:41], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v12 -; SI-NEXT: v_writelane_b32 v14, s48, 8 ; SI-NEXT: s_cbranch_scc0 .LBB41_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s92, s5, 16 @@ -24375,16 +24683,16 @@ define inreg <52 x i16> @bitcast_v13i64_to_v52i16_scalar(<13 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x64, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v14, 7 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s48, v14, 8 -; SI-NEXT: v_readlane_b32 s39, v14, 7 -; SI-NEXT: v_readlane_b32 s38, v14, 6 -; SI-NEXT: v_readlane_b32 s37, v14, 5 -; SI-NEXT: v_readlane_b32 s36, v14, 4 -; SI-NEXT: v_readlane_b32 s35, v14, 3 -; SI-NEXT: v_readlane_b32 s34, v14, 2 -; SI-NEXT: v_readlane_b32 s31, v14, 1 -; SI-NEXT: v_readlane_b32 s30, v14, 0 +; SI-NEXT: v_readlane_b32 s31, v14, 8 +; SI-NEXT: v_readlane_b32 s48, v14, 6 +; SI-NEXT: v_readlane_b32 s39, v14, 5 +; SI-NEXT: v_readlane_b32 s38, v14, 4 +; SI-NEXT: v_readlane_b32 s37, v14, 3 +; SI-NEXT: v_readlane_b32 s36, v14, 2 +; SI-NEXT: v_readlane_b32 s35, v14, 1 +; SI-NEXT: v_readlane_b32 s34, v14, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v14, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -26796,6 +27104,14 @@ define inreg <13 x i64> @bitcast_v52i16_to_v13i64_scalar(<52 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v52i16_to_v13i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -26823,14 +27139,6 @@ define inreg <13 x i64> @bitcast_v52i16_to_v13i64_scalar(<52 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -26968,90 +27276,167 @@ define inreg <13 x i64> @bitcast_v52i16_to_v13i64_scalar(<52 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v13i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -27398,16 +27783,7 @@ end: define <52 x half> @bitcast_v13i64_to_v52f16(<13 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v13i64_to_v52f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -27428,6 +27804,11 @@ define <52 x half> @bitcast_v13i64_to_v52f16(<13 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr49 ; SI-NEXT: ; implicit-def: $vgpr38 @@ -27482,6 +27863,10 @@ define <52 x half> @bitcast_v13i64_to_v52f16(<13 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -27973,11 +28358,11 @@ define <52 x half> @bitcast_v13i64_to_v52f16(<13 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v13i64_to_v52f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -28154,11 +28539,11 @@ define <52 x half> @bitcast_v13i64_to_v52f16(<13 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v13i64_to_v52f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -28529,6 +28914,11 @@ define inreg <52 x half> @bitcast_v13i64_to_v52f16_scalar(<13 x i64> inreg %a, i ; SI-LABEL: bitcast_v13i64_to_v52f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; SI-NEXT: v_readfirstlane_b32 s40, v1 ; SI-NEXT: v_readfirstlane_b32 s41, v2 @@ -28543,11 +28933,6 @@ define inreg <52 x half> @bitcast_v13i64_to_v52f16_scalar(<13 x i64> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v11 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v12 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB45_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -31635,6 +32020,14 @@ define inreg <13 x i64> @bitcast_v52f16_to_v13i64_scalar(<52 x half> inreg %a, i ; GFX9-LABEL: bitcast_v52f16_to_v13i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -31662,14 +32055,6 @@ define inreg <13 x i64> @bitcast_v52f16_to_v13i64_scalar(<52 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -31809,90 +32194,167 @@ define inreg <13 x i64> @bitcast_v52f16_to_v13i64_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v13i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -32240,12 +32702,12 @@ define <52 x i16> @bitcast_v13f64_to_v52i16(<13 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v13f64_to_v52i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -32523,11 +32985,11 @@ define <52 x i16> @bitcast_v13f64_to_v52i16(<13 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v13f64_to_v52i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -32691,11 +33153,11 @@ define <52 x i16> @bitcast_v13f64_to_v52i16(<13 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v13f64_to_v52i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -33013,6 +33475,14 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; SI-LABEL: bitcast_v13f64_to_v52i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 ; SI-NEXT: v_mov_b32_e32 v25, s16 ; SI-NEXT: v_mov_b32_e32 v26, s17 @@ -33029,14 +33499,6 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v13, s28 ; SI-NEXT: v_mov_b32_e32 v14, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB49_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[27:28], v[11:12], 16 @@ -33317,6 +33779,10 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; VI-LABEL: bitcast_v13f64_to_v52i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; VI-NEXT: v_mov_b32_e32 v21, s16 ; VI-NEXT: v_mov_b32_e32 v22, s17 @@ -33333,10 +33799,6 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v15, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB49_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v25, 16, v11 @@ -33509,6 +33971,10 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; GFX9-LABEL: bitcast_v13f64_to_v52i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_mov_b32_e32 v21, s16 ; GFX9-NEXT: v_mov_b32_e32 v22, s17 @@ -33525,10 +33991,6 @@ define inreg <52 x i16> @bitcast_v13f64_to_v52i16_scalar(<13 x double> inreg %a, ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v15, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB49_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v25, 16, v11 @@ -35826,6 +36288,14 @@ define inreg <13 x double> @bitcast_v52i16_to_v13f64_scalar(<52 x i16> inreg %a, ; GFX9-LABEL: bitcast_v52i16_to_v13f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -35853,14 +36323,6 @@ define inreg <13 x double> @bitcast_v52i16_to_v13f64_scalar(<52 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -35998,90 +36460,167 @@ define inreg <13 x double> @bitcast_v52i16_to_v13f64_scalar(<52 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v13f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -36428,16 +36967,7 @@ end: define <52 x half> @bitcast_v13f64_to_v52f16(<13 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v13f64_to_v52f16: ; SI: ; %bb.0: -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 -; SI-NEXT: ; implicit-def: $vgpr51 -; SI-NEXT: ; implicit-def: $vgpr50 -; SI-NEXT: ; kill: killed $vgpr51 -; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -36458,6 +36988,11 @@ define <52 x half> @bitcast_v13f64_to_v52f16(<13 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v27 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr49 ; SI-NEXT: ; implicit-def: $vgpr38 @@ -36512,6 +37047,10 @@ define <52 x half> @bitcast_v13f64_to_v52f16(<13 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr50 ; SI-NEXT: ; kill: killed $vgpr51 +; SI-NEXT: ; kill: killed $vgpr50 +; SI-NEXT: ; implicit-def: $vgpr51 +; SI-NEXT: ; implicit-def: $vgpr50 +; SI-NEXT: ; kill: killed $vgpr51 ; SI-NEXT: ; implicit-def: $vgpr51 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -36976,11 +37515,11 @@ define <52 x half> @bitcast_v13f64_to_v52f16(<13 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v13f64_to_v52f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; VI-NEXT: ; implicit-def: $vgpr43 ; VI-NEXT: ; implicit-def: $vgpr42 ; VI-NEXT: ; implicit-def: $vgpr41 @@ -37144,11 +37683,11 @@ define <52 x half> @bitcast_v13f64_to_v52f16(<13 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v13f64_to_v52f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GFX9-NEXT: ; implicit-def: $vgpr43 ; GFX9-NEXT: ; implicit-def: $vgpr42 ; GFX9-NEXT: ; implicit-def: $vgpr41 @@ -37466,20 +38005,6 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; SI-LABEL: bitcast_v13f64_to_v52f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 -; SI-NEXT: v_readfirstlane_b32 s14, v1 -; SI-NEXT: v_readfirstlane_b32 s15, v2 -; SI-NEXT: v_readfirstlane_b32 s12, v3 -; SI-NEXT: v_readfirstlane_b32 s13, v4 -; SI-NEXT: v_readfirstlane_b32 s10, v5 -; SI-NEXT: v_readfirstlane_b32 s11, v6 -; SI-NEXT: v_readfirstlane_b32 s8, v7 -; SI-NEXT: v_readfirstlane_b32 s9, v8 -; SI-NEXT: v_readfirstlane_b32 s6, v9 -; SI-NEXT: v_readfirstlane_b32 s7, v10 -; SI-NEXT: v_readfirstlane_b32 s4, v11 -; SI-NEXT: s_and_b64 s[40:41], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s5, v12 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -37496,6 +38021,20 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; SI-NEXT: v_readfirstlane_b32 s14, v1 +; SI-NEXT: v_readfirstlane_b32 s15, v2 +; SI-NEXT: v_readfirstlane_b32 s12, v3 +; SI-NEXT: v_readfirstlane_b32 s13, v4 +; SI-NEXT: v_readfirstlane_b32 s10, v5 +; SI-NEXT: v_readfirstlane_b32 s11, v6 +; SI-NEXT: v_readfirstlane_b32 s8, v7 +; SI-NEXT: v_readfirstlane_b32 s9, v8 +; SI-NEXT: v_readfirstlane_b32 s6, v9 +; SI-NEXT: v_readfirstlane_b32 s7, v10 +; SI-NEXT: v_readfirstlane_b32 s4, v11 +; SI-NEXT: s_and_b64 s[40:41], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s5, v12 ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s40, s5, 16 @@ -37934,6 +38473,10 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; VI-LABEL: bitcast_v13f64_to_v52f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; VI-NEXT: v_mov_b32_e32 v21, s16 ; VI-NEXT: v_mov_b32_e32 v22, s17 @@ -37950,10 +38493,6 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v15, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB53_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v25, 16, v11 @@ -38126,6 +38665,10 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; GFX9-LABEL: bitcast_v13f64_to_v52f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: v_mov_b32_e32 v21, s16 ; GFX9-NEXT: v_mov_b32_e32 v22, s17 @@ -38142,10 +38685,6 @@ define inreg <52 x half> @bitcast_v13f64_to_v52f16_scalar(<13 x double> inreg %a ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v15, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB53_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v25, 16, v11 @@ -40717,6 +41256,14 @@ define inreg <13 x double> @bitcast_v52f16_to_v13f64_scalar(<52 x half> inreg %a ; GFX9-LABEL: bitcast_v52f16_to_v13f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v11 ; GFX9-NEXT: v_mov_b32_e32 v33, v10 ; GFX9-NEXT: v_mov_b32_e32 v34, v9 @@ -40744,14 +41291,6 @@ define inreg <13 x double> @bitcast_v52f16_to_v13f64_scalar(<52 x half> inreg %a ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v52, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v53, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v34 @@ -40891,90 +41430,167 @@ define inreg <13 x double> @bitcast_v52f16_to_v13f64_scalar(<52 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v13f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 @@ -43549,6 +44165,10 @@ define inreg <52 x half> @bitcast_v52i16_to_v52f16_scalar(<52 x i16> inreg %a, i ; GFX9-LABEL: bitcast_v52i16_to_v52f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -43577,10 +44197,6 @@ define inreg <52 x half> @bitcast_v52i16_to_v52f16_scalar(<52 x i16> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB57_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB57_4 @@ -45783,6 +46399,10 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; VI-LABEL: bitcast_v52f16_to_v52i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; VI-NEXT: s_lshr_b32 s6, s29, 16 ; VI-NEXT: s_lshr_b32 s7, s28, 16 @@ -45811,10 +46431,6 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; VI-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; VI-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB59_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB59_4 @@ -45979,6 +46595,10 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; GFX9-LABEL: bitcast_v52f16_to_v52i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -46007,10 +46627,6 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB59_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB59_4 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll index 8eb71e90f8504..f6ff5be918706 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll @@ -2719,7 +2719,6 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v28i32_to_v56i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill @@ -2729,6 +2728,7 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr42 @@ -3045,7 +3045,6 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v28i32_to_v56i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -3054,6 +3053,7 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -3246,7 +3246,6 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v28i32_to_v56i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -3255,6 +3254,7 @@ define <56 x i16> @bitcast_v28i32_to_v56i16(<28 x i32> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -3641,20 +3641,21 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v16, s30, 0 -; SI-NEXT: v_writelane_b32 v16, s31, 1 -; SI-NEXT: v_writelane_b32 v16, s34, 2 -; SI-NEXT: v_writelane_b32 v16, s35, 3 -; SI-NEXT: v_writelane_b32 v16, s36, 4 -; SI-NEXT: v_writelane_b32 v16, s37, 5 -; SI-NEXT: v_writelane_b32 v16, s38, 6 -; SI-NEXT: v_writelane_b32 v16, s39, 7 -; SI-NEXT: v_writelane_b32 v16, s48, 8 -; SI-NEXT: v_writelane_b32 v16, s49, 9 -; SI-NEXT: v_writelane_b32 v16, s50, 10 -; SI-NEXT: v_writelane_b32 v16, s51, 11 +; SI-NEXT: v_writelane_b32 v16, s34, 0 +; SI-NEXT: v_writelane_b32 v16, s35, 1 +; SI-NEXT: v_writelane_b32 v16, s36, 2 +; SI-NEXT: v_writelane_b32 v16, s37, 3 +; SI-NEXT: v_writelane_b32 v16, s38, 4 +; SI-NEXT: v_writelane_b32 v16, s39, 5 +; SI-NEXT: v_writelane_b32 v16, s48, 6 +; SI-NEXT: v_writelane_b32 v16, s49, 7 +; SI-NEXT: v_writelane_b32 v16, s50, 8 +; SI-NEXT: v_writelane_b32 v16, s51, 9 +; SI-NEXT: v_writelane_b32 v16, s52, 10 +; SI-NEXT: v_writelane_b32 v16, s53, 11 +; SI-NEXT: v_writelane_b32 v16, s30, 12 +; SI-NEXT: v_writelane_b32 v16, s31, 13 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; SI-NEXT: v_writelane_b32 v16, s52, 12 ; SI-NEXT: v_readfirstlane_b32 s40, v1 ; SI-NEXT: v_readfirstlane_b32 s41, v2 ; SI-NEXT: v_readfirstlane_b32 s14, v3 @@ -3670,7 +3671,6 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v13 ; SI-NEXT: s_and_b64 s[42:43], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v14 -; SI-NEXT: v_writelane_b32 v16, s53, 13 ; SI-NEXT: s_cbranch_scc0 .LBB13_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s30, s5, 16 @@ -3950,21 +3950,21 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x6c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v16, 12 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s53, v16, 13 -; SI-NEXT: v_readlane_b32 s52, v16, 12 -; SI-NEXT: v_readlane_b32 s51, v16, 11 -; SI-NEXT: v_readlane_b32 s50, v16, 10 -; SI-NEXT: v_readlane_b32 s49, v16, 9 -; SI-NEXT: v_readlane_b32 s48, v16, 8 -; SI-NEXT: v_readlane_b32 s39, v16, 7 -; SI-NEXT: v_readlane_b32 s38, v16, 6 -; SI-NEXT: v_readlane_b32 s37, v16, 5 -; SI-NEXT: v_readlane_b32 s36, v16, 4 -; SI-NEXT: v_readlane_b32 s35, v16, 3 -; SI-NEXT: v_readlane_b32 s34, v16, 2 -; SI-NEXT: v_readlane_b32 s31, v16, 1 -; SI-NEXT: v_readlane_b32 s30, v16, 0 +; SI-NEXT: v_readlane_b32 s31, v16, 13 +; SI-NEXT: v_readlane_b32 s53, v16, 11 +; SI-NEXT: v_readlane_b32 s52, v16, 10 +; SI-NEXT: v_readlane_b32 s51, v16, 9 +; SI-NEXT: v_readlane_b32 s50, v16, 8 +; SI-NEXT: v_readlane_b32 s49, v16, 7 +; SI-NEXT: v_readlane_b32 s48, v16, 6 +; SI-NEXT: v_readlane_b32 s39, v16, 5 +; SI-NEXT: v_readlane_b32 s38, v16, 4 +; SI-NEXT: v_readlane_b32 s37, v16, 3 +; SI-NEXT: v_readlane_b32 s36, v16, 2 +; SI-NEXT: v_readlane_b32 s35, v16, 1 +; SI-NEXT: v_readlane_b32 s34, v16, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -4007,10 +4007,11 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v28, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v28, s30, 0 -; VI-NEXT: v_writelane_b32 v28, s31, 1 +; VI-NEXT: v_writelane_b32 v28, s34, 0 +; VI-NEXT: v_writelane_b32 v28, s35, 1 +; VI-NEXT: v_writelane_b32 v28, s30, 2 +; VI-NEXT: v_writelane_b32 v28, s31, 3 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; VI-NEXT: v_writelane_b32 v28, s34, 2 ; VI-NEXT: v_readfirstlane_b32 s43, v0 ; VI-NEXT: v_readfirstlane_b32 s42, v1 ; VI-NEXT: v_readfirstlane_b32 s41, v2 @@ -4026,7 +4027,6 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s6, v12 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v13 -; VI-NEXT: v_writelane_b32 v28, s35, 3 ; VI-NEXT: s_cbranch_scc0 .LBB13_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s44, s7, 16 @@ -4200,6 +4200,7 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s42, s44, 16 ; VI-NEXT: s_or_b32 s7, s7, s42 +; VI-NEXT: v_readlane_b32 s30, v28, 2 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -4228,10 +4229,9 @@ define inreg <56 x i16> @bitcast_v28i32_to_v56i16_scalar(<28 x i32> inreg %a, i3 ; VI-NEXT: v_mov_b32_e32 v25, s8 ; VI-NEXT: v_mov_b32_e32 v26, s6 ; VI-NEXT: v_mov_b32_e32 v27, s7 -; VI-NEXT: v_readlane_b32 s35, v28, 3 -; VI-NEXT: v_readlane_b32 s34, v28, 2 -; VI-NEXT: v_readlane_b32 s31, v28, 1 -; VI-NEXT: v_readlane_b32 s30, v28, 0 +; VI-NEXT: v_readlane_b32 s31, v28, 3 +; VI-NEXT: v_readlane_b32 s35, v28, 1 +; VI-NEXT: v_readlane_b32 s34, v28, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v28, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -6585,6 +6585,18 @@ define inreg <28 x i32> @bitcast_v56i16_to_v28i32_scalar(<56 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v56i16_to_v28i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -6614,18 +6626,6 @@ define inreg <28 x i32> @bitcast_v56i16_to_v28i32_scalar(<56 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -6779,90 +6779,167 @@ define inreg <28 x i32> @bitcast_v56i16_to_v28i32_scalar(<56 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v28i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -7224,6 +7301,22 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v28i32_to_v56f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 @@ -7266,22 +7359,6 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -7867,7 +7944,6 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v28i32_to_v56f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -7876,6 +7952,7 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -8068,7 +8145,6 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v28i32_to_v56f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -8077,6 +8153,7 @@ define <56 x half> @bitcast_v28i32_to_v56f16(<28 x i32> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -8459,6 +8536,15 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; SI-LABEL: bitcast_v28i32_to_v56f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 ; SI-NEXT: v_readfirstlane_b32 s43, v1 ; SI-NEXT: v_readfirstlane_b32 s42, v2 @@ -8475,15 +8561,6 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v13 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v14 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB17_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -8964,10 +9041,11 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v28, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v28, s30, 0 -; VI-NEXT: v_writelane_b32 v28, s31, 1 +; VI-NEXT: v_writelane_b32 v28, s34, 0 +; VI-NEXT: v_writelane_b32 v28, s35, 1 +; VI-NEXT: v_writelane_b32 v28, s30, 2 +; VI-NEXT: v_writelane_b32 v28, s31, 3 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; VI-NEXT: v_writelane_b32 v28, s34, 2 ; VI-NEXT: v_readfirstlane_b32 s43, v0 ; VI-NEXT: v_readfirstlane_b32 s42, v1 ; VI-NEXT: v_readfirstlane_b32 s41, v2 @@ -8983,7 +9061,6 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s6, v12 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v13 -; VI-NEXT: v_writelane_b32 v28, s35, 3 ; VI-NEXT: s_cbranch_scc0 .LBB17_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s44, s7, 16 @@ -9157,6 +9234,7 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s42, s44, 16 ; VI-NEXT: s_or_b32 s7, s7, s42 +; VI-NEXT: v_readlane_b32 s30, v28, 2 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -9185,10 +9263,9 @@ define inreg <56 x half> @bitcast_v28i32_to_v56f16_scalar(<28 x i32> inreg %a, i ; VI-NEXT: v_mov_b32_e32 v25, s8 ; VI-NEXT: v_mov_b32_e32 v26, s6 ; VI-NEXT: v_mov_b32_e32 v27, s7 -; VI-NEXT: v_readlane_b32 s35, v28, 3 -; VI-NEXT: v_readlane_b32 s34, v28, 2 -; VI-NEXT: v_readlane_b32 s31, v28, 1 -; VI-NEXT: v_readlane_b32 s30, v28, 0 +; VI-NEXT: v_readlane_b32 s31, v28, 3 +; VI-NEXT: v_readlane_b32 s35, v28, 1 +; VI-NEXT: v_readlane_b32 s34, v28, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v28, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -11847,6 +11924,18 @@ define inreg <28 x i32> @bitcast_v56f16_to_v28i32_scalar(<56 x half> inreg %a, i ; GFX9-LABEL: bitcast_v56f16_to_v28i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -11876,18 +11965,6 @@ define inreg <28 x i32> @bitcast_v56f16_to_v28i32_scalar(<56 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -12043,90 +12120,167 @@ define inreg <28 x i32> @bitcast_v56f16_to_v28i32_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v28i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -14225,7 +14379,6 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v28f32_to_v56i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill @@ -14235,6 +14388,7 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr42 @@ -14551,7 +14705,6 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v28f32_to_v56i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -14560,6 +14713,7 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -14752,7 +14906,6 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v28f32_to_v56i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -14761,6 +14914,7 @@ define <56 x i16> @bitcast_v28f32_to_v56i16(<28 x float> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -15115,6 +15269,18 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; SI-LABEL: bitcast_v28f32_to_v56i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 ; SI-NEXT: v_mov_b32_e32 v27, s16 ; SI-NEXT: v_mov_b32_e32 v28, s17 @@ -15131,18 +15297,6 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v15, s28 ; SI-NEXT: v_mov_b32_e32 v16, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB29_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[29:30], v[13:14], 16 @@ -15460,6 +15614,14 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; VI-LABEL: bitcast_v28f32_to_v56i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; VI-NEXT: v_mov_b32_e32 v20, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -15476,14 +15638,6 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v21, s27 ; VI-NEXT: v_mov_b32_e32 v19, s28 ; VI-NEXT: v_mov_b32_e32 v14, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB29_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v50, 16, v13 @@ -15687,6 +15841,14 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; GFX9-LABEL: bitcast_v28f32_to_v56i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: v_mov_b32_e32 v20, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -15703,14 +15865,6 @@ define inreg <56 x i16> @bitcast_v28f32_to_v56i16_scalar(<28 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v21, s27 ; GFX9-NEXT: v_mov_b32_e32 v19, s28 ; GFX9-NEXT: v_mov_b32_e32 v14, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB29_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v13 @@ -18210,6 +18364,18 @@ define inreg <28 x float> @bitcast_v56i16_to_v28f32_scalar(<56 x i16> inreg %a, ; GFX9-LABEL: bitcast_v56i16_to_v28f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -18239,18 +18405,6 @@ define inreg <28 x float> @bitcast_v56i16_to_v28f32_scalar(<56 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -18404,90 +18558,167 @@ define inreg <28 x float> @bitcast_v56i16_to_v28f32_scalar(<56 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v28f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -18849,6 +19080,22 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v28f32_to_v56f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 @@ -18891,22 +19138,6 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -19492,7 +19723,6 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v28f32_to_v56f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -19501,6 +19731,7 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -19693,7 +19924,6 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v28f32_to_v56f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -19702,6 +19932,7 @@ define <56 x half> @bitcast_v28f32_to_v56f16(<28 x float> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -20056,22 +20287,6 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; SI-LABEL: bitcast_v28f32_to_v56f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; SI-NEXT: v_readfirstlane_b32 s43, v1 -; SI-NEXT: v_readfirstlane_b32 s42, v2 -; SI-NEXT: v_readfirstlane_b32 s41, v3 -; SI-NEXT: v_readfirstlane_b32 s40, v4 -; SI-NEXT: v_readfirstlane_b32 s15, v5 -; SI-NEXT: v_readfirstlane_b32 s14, v6 -; SI-NEXT: v_readfirstlane_b32 s13, v7 -; SI-NEXT: v_readfirstlane_b32 s12, v8 -; SI-NEXT: v_readfirstlane_b32 s11, v9 -; SI-NEXT: v_readfirstlane_b32 s10, v10 -; SI-NEXT: v_readfirstlane_b32 s8, v11 -; SI-NEXT: v_readfirstlane_b32 s7, v12 -; SI-NEXT: v_readfirstlane_b32 s6, v13 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s9, v14 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -20088,6 +20303,22 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 +; SI-NEXT: v_readfirstlane_b32 s43, v1 +; SI-NEXT: v_readfirstlane_b32 s42, v2 +; SI-NEXT: v_readfirstlane_b32 s41, v3 +; SI-NEXT: v_readfirstlane_b32 s40, v4 +; SI-NEXT: v_readfirstlane_b32 s15, v5 +; SI-NEXT: v_readfirstlane_b32 s14, v6 +; SI-NEXT: v_readfirstlane_b32 s13, v7 +; SI-NEXT: v_readfirstlane_b32 s12, v8 +; SI-NEXT: v_readfirstlane_b32 s11, v9 +; SI-NEXT: v_readfirstlane_b32 s10, v10 +; SI-NEXT: v_readfirstlane_b32 s8, v11 +; SI-NEXT: v_readfirstlane_b32 s7, v12 +; SI-NEXT: v_readfirstlane_b32 s6, v13 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s9, v14 ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -20575,6 +20806,14 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; VI-LABEL: bitcast_v28f32_to_v56f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; VI-NEXT: v_mov_b32_e32 v20, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -20591,14 +20830,6 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v21, s27 ; VI-NEXT: v_mov_b32_e32 v19, s28 ; VI-NEXT: v_mov_b32_e32 v14, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB33_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v50, 16, v13 @@ -20802,6 +21033,14 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; GFX9-LABEL: bitcast_v28f32_to_v56f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: v_mov_b32_e32 v20, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -20818,14 +21057,6 @@ define inreg <56 x half> @bitcast_v28f32_to_v56f16_scalar(<28 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v21, s27 ; GFX9-NEXT: v_mov_b32_e32 v19, s28 ; GFX9-NEXT: v_mov_b32_e32 v14, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB33_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v50, 16, v13 @@ -23630,6 +23861,18 @@ define inreg <28 x float> @bitcast_v56f16_to_v28f32_scalar(<56 x half> inreg %a, ; GFX9-LABEL: bitcast_v56f16_to_v28f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -23659,18 +23902,6 @@ define inreg <28 x float> @bitcast_v56f16_to_v28f32_scalar(<56 x half> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -23826,90 +24057,167 @@ define inreg <28 x float> @bitcast_v56f16_to_v28f32_scalar(<56 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v28f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -25120,7 +25428,6 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v14i64_to_v56i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill @@ -25130,6 +25437,7 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr42 @@ -25446,7 +25754,6 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v14i64_to_v56i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -25455,6 +25762,7 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -25647,7 +25955,6 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v14i64_to_v56i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -25656,6 +25963,7 @@ define <56 x i16> @bitcast_v14i64_to_v56i16(<14 x i64> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -26056,20 +26364,21 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v16, s30, 0 -; SI-NEXT: v_writelane_b32 v16, s31, 1 -; SI-NEXT: v_writelane_b32 v16, s34, 2 -; SI-NEXT: v_writelane_b32 v16, s35, 3 -; SI-NEXT: v_writelane_b32 v16, s36, 4 -; SI-NEXT: v_writelane_b32 v16, s37, 5 -; SI-NEXT: v_writelane_b32 v16, s38, 6 -; SI-NEXT: v_writelane_b32 v16, s39, 7 -; SI-NEXT: v_writelane_b32 v16, s48, 8 -; SI-NEXT: v_writelane_b32 v16, s49, 9 -; SI-NEXT: v_writelane_b32 v16, s50, 10 -; SI-NEXT: v_writelane_b32 v16, s51, 11 +; SI-NEXT: v_writelane_b32 v16, s34, 0 +; SI-NEXT: v_writelane_b32 v16, s35, 1 +; SI-NEXT: v_writelane_b32 v16, s36, 2 +; SI-NEXT: v_writelane_b32 v16, s37, 3 +; SI-NEXT: v_writelane_b32 v16, s38, 4 +; SI-NEXT: v_writelane_b32 v16, s39, 5 +; SI-NEXT: v_writelane_b32 v16, s48, 6 +; SI-NEXT: v_writelane_b32 v16, s49, 7 +; SI-NEXT: v_writelane_b32 v16, s50, 8 +; SI-NEXT: v_writelane_b32 v16, s51, 9 +; SI-NEXT: v_writelane_b32 v16, s52, 10 +; SI-NEXT: v_writelane_b32 v16, s53, 11 +; SI-NEXT: v_writelane_b32 v16, s30, 12 +; SI-NEXT: v_writelane_b32 v16, s31, 13 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; SI-NEXT: v_writelane_b32 v16, s52, 12 ; SI-NEXT: v_readfirstlane_b32 s40, v1 ; SI-NEXT: v_readfirstlane_b32 s41, v2 ; SI-NEXT: v_readfirstlane_b32 s14, v3 @@ -26085,7 +26394,6 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v13 ; SI-NEXT: s_and_b64 s[42:43], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v14 -; SI-NEXT: v_writelane_b32 v16, s53, 13 ; SI-NEXT: s_cbranch_scc0 .LBB41_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s30, s5, 16 @@ -26365,21 +26673,21 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x6c, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v16, 12 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s53, v16, 13 -; SI-NEXT: v_readlane_b32 s52, v16, 12 -; SI-NEXT: v_readlane_b32 s51, v16, 11 -; SI-NEXT: v_readlane_b32 s50, v16, 10 -; SI-NEXT: v_readlane_b32 s49, v16, 9 -; SI-NEXT: v_readlane_b32 s48, v16, 8 -; SI-NEXT: v_readlane_b32 s39, v16, 7 -; SI-NEXT: v_readlane_b32 s38, v16, 6 -; SI-NEXT: v_readlane_b32 s37, v16, 5 -; SI-NEXT: v_readlane_b32 s36, v16, 4 -; SI-NEXT: v_readlane_b32 s35, v16, 3 -; SI-NEXT: v_readlane_b32 s34, v16, 2 -; SI-NEXT: v_readlane_b32 s31, v16, 1 -; SI-NEXT: v_readlane_b32 s30, v16, 0 +; SI-NEXT: v_readlane_b32 s31, v16, 13 +; SI-NEXT: v_readlane_b32 s53, v16, 11 +; SI-NEXT: v_readlane_b32 s52, v16, 10 +; SI-NEXT: v_readlane_b32 s51, v16, 9 +; SI-NEXT: v_readlane_b32 s50, v16, 8 +; SI-NEXT: v_readlane_b32 s49, v16, 7 +; SI-NEXT: v_readlane_b32 s48, v16, 6 +; SI-NEXT: v_readlane_b32 s39, v16, 5 +; SI-NEXT: v_readlane_b32 s38, v16, 4 +; SI-NEXT: v_readlane_b32 s37, v16, 3 +; SI-NEXT: v_readlane_b32 s36, v16, 2 +; SI-NEXT: v_readlane_b32 s35, v16, 1 +; SI-NEXT: v_readlane_b32 s34, v16, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -26422,10 +26730,11 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v28, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v28, s30, 0 -; VI-NEXT: v_writelane_b32 v28, s31, 1 +; VI-NEXT: v_writelane_b32 v28, s34, 0 +; VI-NEXT: v_writelane_b32 v28, s35, 1 +; VI-NEXT: v_writelane_b32 v28, s30, 2 +; VI-NEXT: v_writelane_b32 v28, s31, 3 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; VI-NEXT: v_writelane_b32 v28, s34, 2 ; VI-NEXT: v_readfirstlane_b32 s43, v0 ; VI-NEXT: v_readfirstlane_b32 s42, v1 ; VI-NEXT: v_readfirstlane_b32 s41, v2 @@ -26441,7 +26750,6 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s6, v12 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v13 -; VI-NEXT: v_writelane_b32 v28, s35, 3 ; VI-NEXT: s_cbranch_scc0 .LBB41_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s44, s7, 16 @@ -26615,6 +26923,7 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s42, s44, 16 ; VI-NEXT: s_or_b32 s7, s7, s42 +; VI-NEXT: v_readlane_b32 s30, v28, 2 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -26643,10 +26952,9 @@ define inreg <56 x i16> @bitcast_v14i64_to_v56i16_scalar(<14 x i64> inreg %a, i3 ; VI-NEXT: v_mov_b32_e32 v25, s8 ; VI-NEXT: v_mov_b32_e32 v26, s6 ; VI-NEXT: v_mov_b32_e32 v27, s7 -; VI-NEXT: v_readlane_b32 s35, v28, 3 -; VI-NEXT: v_readlane_b32 s34, v28, 2 -; VI-NEXT: v_readlane_b32 s31, v28, 1 -; VI-NEXT: v_readlane_b32 s30, v28, 0 +; VI-NEXT: v_readlane_b32 s31, v28, 3 +; VI-NEXT: v_readlane_b32 s35, v28, 1 +; VI-NEXT: v_readlane_b32 s34, v28, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v28, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -29000,6 +29308,18 @@ define inreg <14 x i64> @bitcast_v56i16_to_v14i64_scalar(<56 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v56i16_to_v14i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -29029,18 +29349,6 @@ define inreg <14 x i64> @bitcast_v56i16_to_v14i64_scalar(<56 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -29194,90 +29502,167 @@ define inreg <14 x i64> @bitcast_v56i16_to_v14i64_scalar(<56 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v14i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -29639,6 +30024,22 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v14i64_to_v56f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 @@ -29681,22 +30082,6 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr40 ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr54 @@ -30282,7 +30667,6 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v14i64_to_v56f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -30291,6 +30675,7 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -30483,7 +30868,6 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v14i64_to_v56f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -30492,6 +30876,7 @@ define <56 x half> @bitcast_v14i64_to_v56f16(<14 x i64> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -30888,6 +31273,15 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; SI-LABEL: bitcast_v14i64_to_v56f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 ; SI-NEXT: v_readfirstlane_b32 s42, v1 ; SI-NEXT: v_readfirstlane_b32 s43, v2 @@ -30904,15 +31298,6 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v13 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v14 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB45_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -31393,10 +31778,11 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v28, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v28, s30, 0 -; VI-NEXT: v_writelane_b32 v28, s31, 1 +; VI-NEXT: v_writelane_b32 v28, s34, 0 +; VI-NEXT: v_writelane_b32 v28, s35, 1 +; VI-NEXT: v_writelane_b32 v28, s30, 2 +; VI-NEXT: v_writelane_b32 v28, s31, 3 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; VI-NEXT: v_writelane_b32 v28, s34, 2 ; VI-NEXT: v_readfirstlane_b32 s43, v0 ; VI-NEXT: v_readfirstlane_b32 s42, v1 ; VI-NEXT: v_readfirstlane_b32 s41, v2 @@ -31412,7 +31798,6 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s6, v12 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v13 -; VI-NEXT: v_writelane_b32 v28, s35, 3 ; VI-NEXT: s_cbranch_scc0 .LBB45_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s44, s7, 16 @@ -31586,6 +31971,7 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s42, s44, 16 ; VI-NEXT: s_or_b32 s7, s7, s42 +; VI-NEXT: v_readlane_b32 s30, v28, 2 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -31614,10 +32000,9 @@ define inreg <56 x half> @bitcast_v14i64_to_v56f16_scalar(<14 x i64> inreg %a, i ; VI-NEXT: v_mov_b32_e32 v25, s8 ; VI-NEXT: v_mov_b32_e32 v26, s6 ; VI-NEXT: v_mov_b32_e32 v27, s7 -; VI-NEXT: v_readlane_b32 s35, v28, 3 -; VI-NEXT: v_readlane_b32 s34, v28, 2 -; VI-NEXT: v_readlane_b32 s31, v28, 1 -; VI-NEXT: v_readlane_b32 s30, v28, 0 +; VI-NEXT: v_readlane_b32 s31, v28, 3 +; VI-NEXT: v_readlane_b32 s35, v28, 1 +; VI-NEXT: v_readlane_b32 s34, v28, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v28, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -34276,6 +34661,18 @@ define inreg <14 x i64> @bitcast_v56f16_to_v14i64_scalar(<56 x half> inreg %a, i ; GFX9-LABEL: bitcast_v56f16_to_v14i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -34305,18 +34702,6 @@ define inreg <14 x i64> @bitcast_v56f16_to_v14i64_scalar(<56 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -34472,90 +34857,167 @@ define inreg <14 x i64> @bitcast_v56f16_to_v14i64_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v14i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -34917,7 +35379,6 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v14f64_to_v56i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill @@ -34927,6 +35388,7 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr44 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr42 @@ -35229,7 +35691,6 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v14f64_to_v56i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -35238,6 +35699,7 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -35416,7 +35878,6 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v14f64_to_v56i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -35425,6 +35886,7 @@ define <56 x i16> @bitcast_v14f64_to_v56i16(<14 x double> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -35765,6 +36227,18 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; SI-LABEL: bitcast_v14f64_to_v56i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 ; SI-NEXT: v_mov_b32_e32 v27, s16 ; SI-NEXT: v_mov_b32_e32 v28, s17 @@ -35781,18 +36255,6 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v15, s28 ; SI-NEXT: v_mov_b32_e32 v16, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB49_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[29:30], v[13:14], 16 @@ -36096,6 +36558,14 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; VI-LABEL: bitcast_v14f64_to_v56i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; VI-NEXT: v_mov_b32_e32 v19, s16 ; VI-NEXT: v_mov_b32_e32 v20, s17 @@ -36112,14 +36582,6 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v17, s28 ; VI-NEXT: v_mov_b32_e32 v18, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB49_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v27, 16, v13 @@ -36309,6 +36771,14 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; GFX9-LABEL: bitcast_v14f64_to_v56i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: v_mov_b32_e32 v19, s16 ; GFX9-NEXT: v_mov_b32_e32 v20, s17 @@ -36325,14 +36795,6 @@ define inreg <56 x i16> @bitcast_v14f64_to_v56i16_scalar(<14 x double> inreg %a, ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v17, s28 ; GFX9-NEXT: v_mov_b32_e32 v18, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB49_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v27, 16, v13 @@ -38818,6 +39280,18 @@ define inreg <14 x double> @bitcast_v56i16_to_v14f64_scalar(<56 x i16> inreg %a, ; GFX9-LABEL: bitcast_v56i16_to_v14f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -38847,18 +39321,6 @@ define inreg <14 x double> @bitcast_v56i16_to_v14f64_scalar(<56 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -39012,90 +39474,167 @@ define inreg <14 x double> @bitcast_v56i16_to_v14f64_scalar(<56 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v14f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -39457,6 +39996,22 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v14f64_to_v56f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v29 ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 @@ -39499,22 +40054,6 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr29 ; SI-NEXT: ; kill: killed $vgpr29 ; SI-NEXT: ; implicit-def: $vgpr29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr55 ; SI-NEXT: ; implicit-def: $vgpr40 @@ -40071,7 +40610,6 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v14f64_to_v56f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -40080,6 +40618,7 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; VI-NEXT: ; implicit-def: $vgpr47 ; VI-NEXT: ; implicit-def: $vgpr46 ; VI-NEXT: ; implicit-def: $vgpr45 @@ -40258,7 +40797,6 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v14f64_to_v56f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -40267,6 +40805,7 @@ define <56 x half> @bitcast_v14f64_to_v56f16(<14 x double> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v28 ; GFX9-NEXT: ; implicit-def: $vgpr47 ; GFX9-NEXT: ; implicit-def: $vgpr46 ; GFX9-NEXT: ; implicit-def: $vgpr45 @@ -40607,22 +41146,6 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; SI-LABEL: bitcast_v14f64_to_v56f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; SI-NEXT: v_readfirstlane_b32 s40, v1 -; SI-NEXT: v_readfirstlane_b32 s41, v2 -; SI-NEXT: v_readfirstlane_b32 s14, v3 -; SI-NEXT: v_readfirstlane_b32 s15, v4 -; SI-NEXT: v_readfirstlane_b32 s12, v5 -; SI-NEXT: v_readfirstlane_b32 s13, v6 -; SI-NEXT: v_readfirstlane_b32 s10, v7 -; SI-NEXT: v_readfirstlane_b32 s11, v8 -; SI-NEXT: v_readfirstlane_b32 s8, v9 -; SI-NEXT: v_readfirstlane_b32 s9, v10 -; SI-NEXT: v_readfirstlane_b32 s6, v11 -; SI-NEXT: v_readfirstlane_b32 s7, v12 -; SI-NEXT: v_readfirstlane_b32 s4, v13 -; SI-NEXT: s_and_b64 s[42:43], vcc, exec -; SI-NEXT: v_readfirstlane_b32 s5, v14 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -40639,6 +41162,22 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 +; SI-NEXT: v_readfirstlane_b32 s40, v1 +; SI-NEXT: v_readfirstlane_b32 s41, v2 +; SI-NEXT: v_readfirstlane_b32 s14, v3 +; SI-NEXT: v_readfirstlane_b32 s15, v4 +; SI-NEXT: v_readfirstlane_b32 s12, v5 +; SI-NEXT: v_readfirstlane_b32 s13, v6 +; SI-NEXT: v_readfirstlane_b32 s10, v7 +; SI-NEXT: v_readfirstlane_b32 s11, v8 +; SI-NEXT: v_readfirstlane_b32 s8, v9 +; SI-NEXT: v_readfirstlane_b32 s9, v10 +; SI-NEXT: v_readfirstlane_b32 s6, v11 +; SI-NEXT: v_readfirstlane_b32 s7, v12 +; SI-NEXT: v_readfirstlane_b32 s4, v13 +; SI-NEXT: s_and_b64 s[42:43], vcc, exec +; SI-NEXT: v_readfirstlane_b32 s5, v14 ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s42, s5, 16 @@ -41120,6 +41659,14 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; VI-LABEL: bitcast_v14f64_to_v56f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; VI-NEXT: v_mov_b32_e32 v19, s16 ; VI-NEXT: v_mov_b32_e32 v20, s17 @@ -41136,14 +41683,6 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v17, s28 ; VI-NEXT: v_mov_b32_e32 v18, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB53_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v27, 16, v13 @@ -41333,6 +41872,14 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; GFX9-LABEL: bitcast_v14f64_to_v56f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: v_mov_b32_e32 v19, s16 ; GFX9-NEXT: v_mov_b32_e32 v20, s17 @@ -41349,14 +41896,6 @@ define inreg <56 x half> @bitcast_v14f64_to_v56f16_scalar(<14 x double> inreg %a ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v17, s28 ; GFX9-NEXT: v_mov_b32_e32 v18, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB53_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v27, 16, v13 @@ -44147,6 +44686,18 @@ define inreg <14 x double> @bitcast_v56f16_to_v14f64_scalar(<56 x half> inreg %a ; GFX9-LABEL: bitcast_v56f16_to_v14f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v13 ; GFX9-NEXT: v_mov_b32_e32 v33, v12 ; GFX9-NEXT: v_mov_b32_e32 v34, v11 @@ -44176,18 +44727,6 @@ define inreg <14 x double> @bitcast_v56f16_to_v14f64_scalar(<56 x half> inreg %a ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v55, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v34 @@ -44343,90 +44882,167 @@ define inreg <14 x double> @bitcast_v56f16_to_v14f64_scalar(<56 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v14f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 @@ -47265,6 +47881,14 @@ define inreg <56 x half> @bitcast_v56i16_to_v56f16_scalar(<56 x i16> inreg %a, i ; GFX9-LABEL: bitcast_v56i16_to_v56f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -47295,14 +47919,6 @@ define inreg <56 x half> @bitcast_v56i16_to_v56f16_scalar(<56 x i16> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB57_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB57_4 @@ -49736,6 +50352,14 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; VI-LABEL: bitcast_v56f16_to_v56i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; VI-NEXT: s_lshr_b32 s6, s29, 16 ; VI-NEXT: s_lshr_b32 s7, s28, 16 @@ -49766,14 +50390,6 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; VI-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; VI-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB59_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB59_4 @@ -49952,6 +50568,14 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; GFX9-LABEL: bitcast_v56f16_to_v56i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -49982,14 +50606,6 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB59_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB59_4 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll index 93c11f13ce3ce..134980045bb53 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll @@ -2849,7 +2849,6 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v30i32_to_v60i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -2863,6 +2862,7 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -2892,7 +2892,7 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr39 ; SI-NEXT: ; implicit-def: $vgpr37 -; SI-NEXT: s_waitcnt vmcnt(13) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -3205,7 +3205,6 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v30i32_to_v60i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -3218,6 +3217,7 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -3426,7 +3426,6 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v30i32_to_v60i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -3439,6 +3438,7 @@ define <60 x i16> @bitcast_v30i32_to_v60i16(<30 x i32> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -3851,23 +3851,24 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v18, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v18, s30, 0 -; SI-NEXT: v_writelane_b32 v18, s31, 1 -; SI-NEXT: v_writelane_b32 v18, s34, 2 -; SI-NEXT: v_writelane_b32 v18, s35, 3 -; SI-NEXT: v_writelane_b32 v18, s36, 4 -; SI-NEXT: v_writelane_b32 v18, s37, 5 -; SI-NEXT: v_writelane_b32 v18, s38, 6 -; SI-NEXT: v_writelane_b32 v18, s39, 7 -; SI-NEXT: v_writelane_b32 v18, s48, 8 -; SI-NEXT: v_writelane_b32 v18, s49, 9 -; SI-NEXT: v_writelane_b32 v18, s50, 10 -; SI-NEXT: v_writelane_b32 v18, s51, 11 -; SI-NEXT: v_writelane_b32 v18, s52, 12 -; SI-NEXT: v_writelane_b32 v18, s53, 13 -; SI-NEXT: v_writelane_b32 v18, s54, 14 +; SI-NEXT: v_writelane_b32 v18, s34, 0 +; SI-NEXT: v_writelane_b32 v18, s35, 1 +; SI-NEXT: v_writelane_b32 v18, s36, 2 +; SI-NEXT: v_writelane_b32 v18, s37, 3 +; SI-NEXT: v_writelane_b32 v18, s38, 4 +; SI-NEXT: v_writelane_b32 v18, s39, 5 +; SI-NEXT: v_writelane_b32 v18, s48, 6 +; SI-NEXT: v_writelane_b32 v18, s49, 7 +; SI-NEXT: v_writelane_b32 v18, s50, 8 +; SI-NEXT: v_writelane_b32 v18, s51, 9 +; SI-NEXT: v_writelane_b32 v18, s52, 10 +; SI-NEXT: v_writelane_b32 v18, s53, 11 +; SI-NEXT: v_writelane_b32 v18, s54, 12 +; SI-NEXT: v_writelane_b32 v18, s55, 13 +; SI-NEXT: v_writelane_b32 v18, s64, 14 +; SI-NEXT: v_writelane_b32 v18, s30, 15 +; SI-NEXT: v_writelane_b32 v18, s31, 16 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: v_writelane_b32 v18, s55, 15 ; SI-NEXT: v_readfirstlane_b32 s42, v1 ; SI-NEXT: v_readfirstlane_b32 s43, v2 ; SI-NEXT: v_readfirstlane_b32 s40, v3 @@ -3885,7 +3886,6 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v15 ; SI-NEXT: s_and_b64 s[44:45], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v16 -; SI-NEXT: v_writelane_b32 v18, s64, 16 ; SI-NEXT: s_cbranch_scc0 .LBB13_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s34, s5, 16 @@ -4183,24 +4183,24 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x74, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v18, 15 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s64, v18, 16 -; SI-NEXT: v_readlane_b32 s55, v18, 15 -; SI-NEXT: v_readlane_b32 s54, v18, 14 -; SI-NEXT: v_readlane_b32 s53, v18, 13 -; SI-NEXT: v_readlane_b32 s52, v18, 12 -; SI-NEXT: v_readlane_b32 s51, v18, 11 -; SI-NEXT: v_readlane_b32 s50, v18, 10 -; SI-NEXT: v_readlane_b32 s49, v18, 9 -; SI-NEXT: v_readlane_b32 s48, v18, 8 -; SI-NEXT: v_readlane_b32 s39, v18, 7 -; SI-NEXT: v_readlane_b32 s38, v18, 6 -; SI-NEXT: v_readlane_b32 s37, v18, 5 -; SI-NEXT: v_readlane_b32 s36, v18, 4 -; SI-NEXT: v_readlane_b32 s35, v18, 3 -; SI-NEXT: v_readlane_b32 s34, v18, 2 -; SI-NEXT: v_readlane_b32 s31, v18, 1 -; SI-NEXT: v_readlane_b32 s30, v18, 0 +; SI-NEXT: v_readlane_b32 s31, v18, 16 +; SI-NEXT: v_readlane_b32 s64, v18, 14 +; SI-NEXT: v_readlane_b32 s55, v18, 13 +; SI-NEXT: v_readlane_b32 s54, v18, 12 +; SI-NEXT: v_readlane_b32 s53, v18, 11 +; SI-NEXT: v_readlane_b32 s52, v18, 10 +; SI-NEXT: v_readlane_b32 s51, v18, 9 +; SI-NEXT: v_readlane_b32 s50, v18, 8 +; SI-NEXT: v_readlane_b32 s49, v18, 7 +; SI-NEXT: v_readlane_b32 s48, v18, 6 +; SI-NEXT: v_readlane_b32 s39, v18, 5 +; SI-NEXT: v_readlane_b32 s38, v18, 4 +; SI-NEXT: v_readlane_b32 s37, v18, 3 +; SI-NEXT: v_readlane_b32 s36, v18, 2 +; SI-NEXT: v_readlane_b32 s35, v18, 1 +; SI-NEXT: v_readlane_b32 s34, v18, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v18, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -4245,14 +4245,15 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v30, s30, 0 -; VI-NEXT: v_writelane_b32 v30, s31, 1 -; VI-NEXT: v_writelane_b32 v30, s34, 2 -; VI-NEXT: v_writelane_b32 v30, s35, 3 -; VI-NEXT: v_writelane_b32 v30, s36, 4 -; VI-NEXT: v_writelane_b32 v30, s37, 5 +; VI-NEXT: v_writelane_b32 v30, s34, 0 +; VI-NEXT: v_writelane_b32 v30, s35, 1 +; VI-NEXT: v_writelane_b32 v30, s36, 2 +; VI-NEXT: v_writelane_b32 v30, s37, 3 +; VI-NEXT: v_writelane_b32 v30, s38, 4 +; VI-NEXT: v_writelane_b32 v30, s39, 5 +; VI-NEXT: v_writelane_b32 v30, s30, 6 +; VI-NEXT: v_writelane_b32 v30, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; VI-NEXT: v_writelane_b32 v30, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s45, v0 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s43, v2 @@ -4270,7 +4271,6 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s6, v14 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v15 -; VI-NEXT: v_writelane_b32 v30, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB13_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s46, s7, 16 @@ -4456,6 +4456,7 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s44, s46, 16 ; VI-NEXT: s_or_b32 s7, s7, s44 +; VI-NEXT: v_readlane_b32 s30, v30, 6 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -4486,14 +4487,13 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; VI-NEXT: v_mov_b32_e32 v27, s8 ; VI-NEXT: v_mov_b32_e32 v28, s6 ; VI-NEXT: v_mov_b32_e32 v29, s7 -; VI-NEXT: v_readlane_b32 s39, v30, 7 -; VI-NEXT: v_readlane_b32 s38, v30, 6 -; VI-NEXT: v_readlane_b32 s37, v30, 5 -; VI-NEXT: v_readlane_b32 s36, v30, 4 -; VI-NEXT: v_readlane_b32 s35, v30, 3 -; VI-NEXT: v_readlane_b32 s34, v30, 2 -; VI-NEXT: v_readlane_b32 s31, v30, 1 -; VI-NEXT: v_readlane_b32 s30, v30, 0 +; VI-NEXT: v_readlane_b32 s31, v30, 7 +; VI-NEXT: v_readlane_b32 s39, v30, 5 +; VI-NEXT: v_readlane_b32 s38, v30, 4 +; VI-NEXT: v_readlane_b32 s37, v30, 3 +; VI-NEXT: v_readlane_b32 s36, v30, 2 +; VI-NEXT: v_readlane_b32 s35, v30, 1 +; VI-NEXT: v_readlane_b32 s34, v30, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -4538,10 +4538,11 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v30, s30, 0 -; GFX9-NEXT: v_writelane_b32 v30, s31, 1 +; GFX9-NEXT: v_writelane_b32 v30, s34, 0 +; GFX9-NEXT: v_writelane_b32 v30, s35, 1 +; GFX9-NEXT: v_writelane_b32 v30, s30, 2 +; GFX9-NEXT: v_writelane_b32 v30, s31, 3 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GFX9-NEXT: v_writelane_b32 v30, s34, 2 ; GFX9-NEXT: v_readfirstlane_b32 s6, v0 ; GFX9-NEXT: v_readfirstlane_b32 s7, v1 ; GFX9-NEXT: v_readfirstlane_b32 s8, v2 @@ -4559,7 +4560,6 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; GFX9-NEXT: v_readfirstlane_b32 s44, v14 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s45, v15 -; GFX9-NEXT: v_writelane_b32 v30, s35, 3 ; GFX9-NEXT: s_cbranch_scc0 .LBB13_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s46, s45, 16 @@ -4685,6 +4685,7 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; GFX9-NEXT: s_pack_ll_b32_b16 s41, s43, s56 ; GFX9-NEXT: s_pack_ll_b32_b16 s42, s44, s47 ; GFX9-NEXT: s_pack_ll_b32_b16 s43, s45, s46 +; GFX9-NEXT: v_readlane_b32 s30, v30, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 @@ -4715,10 +4716,9 @@ define inreg <60 x i16> @bitcast_v30i32_to_v60i16_scalar(<30 x i32> inreg %a, i3 ; GFX9-NEXT: v_mov_b32_e32 v27, s41 ; GFX9-NEXT: v_mov_b32_e32 v28, s42 ; GFX9-NEXT: v_mov_b32_e32 v29, s43 -; GFX9-NEXT: v_readlane_b32 s35, v30, 3 -; GFX9-NEXT: v_readlane_b32 s34, v30, 2 -; GFX9-NEXT: v_readlane_b32 s31, v30, 1 -; GFX9-NEXT: v_readlane_b32 s30, v30, 0 +; GFX9-NEXT: v_readlane_b32 s31, v30, 3 +; GFX9-NEXT: v_readlane_b32 s35, v30, 1 +; GFX9-NEXT: v_readlane_b32 s34, v30, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -7024,6 +7024,22 @@ define inreg <30 x i32> @bitcast_v60i16_to_v30i32_scalar(<60 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v60i16_to_v30i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -7055,22 +7071,6 @@ define inreg <30 x i32> @bitcast_v60i16_to_v30i32_scalar(<60 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -7240,90 +7240,167 @@ define inreg <30 x i32> @bitcast_v60i16_to_v30i32_scalar(<60 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v30i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -7701,6 +7778,22 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; SI-LABEL: bitcast_v30i32_to_v60f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 @@ -7735,22 +7828,6 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr32 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr57 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -7784,7 +7861,7 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr42 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: ; kill: killed $vgpr31 @@ -8414,7 +8491,6 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; VI-LABEL: bitcast_v30i32_to_v60f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -8427,6 +8503,7 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -8635,7 +8712,6 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; GFX9-LABEL: bitcast_v30i32_to_v60f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -8648,6 +8724,7 @@ define <60 x half> @bitcast_v30i32_to_v60f16(<30 x i32> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -9056,6 +9133,19 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; SI-LABEL: bitcast_v30i32_to_v60f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; SI-NEXT: v_readfirstlane_b32 s45, v1 ; SI-NEXT: v_readfirstlane_b32 s44, v2 @@ -9074,19 +9164,6 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v15 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v16 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB17_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -9603,14 +9680,15 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v30, s30, 0 -; VI-NEXT: v_writelane_b32 v30, s31, 1 -; VI-NEXT: v_writelane_b32 v30, s34, 2 -; VI-NEXT: v_writelane_b32 v30, s35, 3 -; VI-NEXT: v_writelane_b32 v30, s36, 4 -; VI-NEXT: v_writelane_b32 v30, s37, 5 +; VI-NEXT: v_writelane_b32 v30, s34, 0 +; VI-NEXT: v_writelane_b32 v30, s35, 1 +; VI-NEXT: v_writelane_b32 v30, s36, 2 +; VI-NEXT: v_writelane_b32 v30, s37, 3 +; VI-NEXT: v_writelane_b32 v30, s38, 4 +; VI-NEXT: v_writelane_b32 v30, s39, 5 +; VI-NEXT: v_writelane_b32 v30, s30, 6 +; VI-NEXT: v_writelane_b32 v30, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; VI-NEXT: v_writelane_b32 v30, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s45, v0 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s43, v2 @@ -9628,7 +9706,6 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s6, v14 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v15 -; VI-NEXT: v_writelane_b32 v30, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB17_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s46, s7, 16 @@ -9814,6 +9891,7 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s44, s46, 16 ; VI-NEXT: s_or_b32 s7, s7, s44 +; VI-NEXT: v_readlane_b32 s30, v30, 6 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -9844,14 +9922,13 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; VI-NEXT: v_mov_b32_e32 v27, s8 ; VI-NEXT: v_mov_b32_e32 v28, s6 ; VI-NEXT: v_mov_b32_e32 v29, s7 -; VI-NEXT: v_readlane_b32 s39, v30, 7 -; VI-NEXT: v_readlane_b32 s38, v30, 6 -; VI-NEXT: v_readlane_b32 s37, v30, 5 -; VI-NEXT: v_readlane_b32 s36, v30, 4 -; VI-NEXT: v_readlane_b32 s35, v30, 3 -; VI-NEXT: v_readlane_b32 s34, v30, 2 -; VI-NEXT: v_readlane_b32 s31, v30, 1 -; VI-NEXT: v_readlane_b32 s30, v30, 0 +; VI-NEXT: v_readlane_b32 s31, v30, 7 +; VI-NEXT: v_readlane_b32 s39, v30, 5 +; VI-NEXT: v_readlane_b32 s38, v30, 4 +; VI-NEXT: v_readlane_b32 s37, v30, 3 +; VI-NEXT: v_readlane_b32 s36, v30, 2 +; VI-NEXT: v_readlane_b32 s35, v30, 1 +; VI-NEXT: v_readlane_b32 s34, v30, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -9896,10 +9973,11 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v30, s30, 0 -; GFX9-NEXT: v_writelane_b32 v30, s31, 1 +; GFX9-NEXT: v_writelane_b32 v30, s34, 0 +; GFX9-NEXT: v_writelane_b32 v30, s35, 1 +; GFX9-NEXT: v_writelane_b32 v30, s30, 2 +; GFX9-NEXT: v_writelane_b32 v30, s31, 3 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GFX9-NEXT: v_writelane_b32 v30, s34, 2 ; GFX9-NEXT: v_readfirstlane_b32 s6, v0 ; GFX9-NEXT: v_readfirstlane_b32 s7, v1 ; GFX9-NEXT: v_readfirstlane_b32 s8, v2 @@ -9917,7 +9995,6 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; GFX9-NEXT: v_readfirstlane_b32 s44, v14 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s45, v15 -; GFX9-NEXT: v_writelane_b32 v30, s35, 3 ; GFX9-NEXT: s_cbranch_scc0 .LBB17_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s46, s45, 16 @@ -10043,6 +10120,7 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; GFX9-NEXT: s_pack_ll_b32_b16 s41, s43, s56 ; GFX9-NEXT: s_pack_ll_b32_b16 s42, s44, s47 ; GFX9-NEXT: s_pack_ll_b32_b16 s43, s45, s46 +; GFX9-NEXT: v_readlane_b32 s30, v30, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 @@ -10073,10 +10151,9 @@ define inreg <60 x half> @bitcast_v30i32_to_v60f16_scalar(<30 x i32> inreg %a, i ; GFX9-NEXT: v_mov_b32_e32 v27, s41 ; GFX9-NEXT: v_mov_b32_e32 v28, s42 ; GFX9-NEXT: v_mov_b32_e32 v29, s43 -; GFX9-NEXT: v_readlane_b32 s35, v30, 3 -; GFX9-NEXT: v_readlane_b32 s34, v30, 2 -; GFX9-NEXT: v_readlane_b32 s31, v30, 1 -; GFX9-NEXT: v_readlane_b32 s30, v30, 0 +; GFX9-NEXT: v_readlane_b32 s31, v30, 3 +; GFX9-NEXT: v_readlane_b32 s35, v30, 1 +; GFX9-NEXT: v_readlane_b32 s34, v30, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -12774,6 +12851,22 @@ define inreg <30 x i32> @bitcast_v60f16_to_v30i32_scalar(<60 x half> inreg %a, i ; GFX9-LABEL: bitcast_v60f16_to_v30i32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -12805,22 +12898,6 @@ define inreg <30 x i32> @bitcast_v60f16_to_v30i32_scalar(<60 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -12992,90 +13069,167 @@ define inreg <30 x i32> @bitcast_v60f16_to_v30i32_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v30i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -15272,7 +15426,6 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v30f32_to_v60i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -15286,6 +15439,7 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -15315,7 +15469,7 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr39 ; SI-NEXT: ; implicit-def: $vgpr37 -; SI-NEXT: s_waitcnt vmcnt(13) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -15628,7 +15782,6 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v30f32_to_v60i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -15641,6 +15794,7 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -15849,7 +16003,6 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v30f32_to_v60i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -15862,6 +16015,7 @@ define <60 x i16> @bitcast_v30f32_to_v60i16(<30 x float> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -16240,6 +16394,21 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; SI-LABEL: bitcast_v30f32_to_v60i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; SI-NEXT: v_mov_b32_e32 v27, s16 ; SI-NEXT: v_mov_b32_e32 v28, s17 @@ -16256,21 +16425,6 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_mov_b32_e32 v17, s28 ; SI-NEXT: v_mov_b32_e32 v18, s29 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB29_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[31:32], v[15:16], 16 @@ -16611,6 +16765,18 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; VI-LABEL: bitcast_v30f32_to_v60i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; VI-NEXT: v_mov_b32_e32 v19, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -16627,18 +16793,6 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v21, s27 ; VI-NEXT: v_mov_b32_e32 v20, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB29_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -16860,6 +17014,18 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; GFX9-LABEL: bitcast_v30f32_to_v60i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: v_mov_b32_e32 v19, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -16876,18 +17042,6 @@ define inreg <60 x i16> @bitcast_v30f32_to_v60i16_scalar(<30 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v21, s27 ; GFX9-NEXT: v_mov_b32_e32 v20, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB29_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -19562,6 +19716,22 @@ define inreg <30 x float> @bitcast_v60i16_to_v30f32_scalar(<60 x i16> inreg %a, ; GFX9-LABEL: bitcast_v60i16_to_v30f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -19593,22 +19763,6 @@ define inreg <30 x float> @bitcast_v60i16_to_v30f32_scalar(<60 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -19778,90 +19932,167 @@ define inreg <30 x float> @bitcast_v60i16_to_v30f32_scalar(<60 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v30f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -20239,6 +20470,22 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; SI-LABEL: bitcast_v30f32_to_v60f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 @@ -20273,22 +20520,6 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr32 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr57 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -20322,7 +20553,7 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr42 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: ; kill: killed $vgpr31 @@ -20952,7 +21183,6 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; VI-LABEL: bitcast_v30f32_to_v60f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -20965,6 +21195,7 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -21173,7 +21404,6 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; GFX9-LABEL: bitcast_v30f32_to_v60f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -21186,6 +21416,7 @@ define <60 x half> @bitcast_v30f32_to_v60f16(<30 x float> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -21564,6 +21795,22 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; SI-LABEL: bitcast_v30f32_to_v60f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; SI-NEXT: v_readfirstlane_b32 s45, v1 ; SI-NEXT: v_readfirstlane_b32 s44, v2 @@ -21582,22 +21829,6 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; SI-NEXT: v_readfirstlane_b32 s6, v15 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v16 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB33_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -22130,6 +22361,18 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; VI-LABEL: bitcast_v30f32_to_v60f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; VI-NEXT: v_mov_b32_e32 v19, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -22146,18 +22389,6 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; VI-NEXT: v_mov_b32_e32 v21, s27 ; VI-NEXT: v_mov_b32_e32 v20, s28 ; VI-NEXT: v_mov_b32_e32 v16, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB33_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -22379,6 +22610,18 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; GFX9-LABEL: bitcast_v30f32_to_v60f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: v_mov_b32_e32 v19, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -22395,18 +22638,6 @@ define inreg <60 x half> @bitcast_v30f32_to_v60f16_scalar(<30 x float> inreg %a, ; GFX9-NEXT: v_mov_b32_e32 v21, s27 ; GFX9-NEXT: v_mov_b32_e32 v20, s28 ; GFX9-NEXT: v_mov_b32_e32 v16, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB33_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -25473,6 +25704,22 @@ define inreg <30 x float> @bitcast_v60f16_to_v30f32_scalar(<60 x half> inreg %a, ; GFX9-LABEL: bitcast_v60f16_to_v30f32_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -25504,22 +25751,6 @@ define inreg <30 x float> @bitcast_v60f16_to_v30f32_scalar(<60 x half> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -25691,90 +25922,167 @@ define inreg <30 x float> @bitcast_v60f16_to_v30f32_scalar(<60 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v30f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -27041,7 +27349,6 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v15i64_to_v60i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -27055,6 +27362,7 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -27084,7 +27392,7 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr38 -; SI-NEXT: s_waitcnt vmcnt(13) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -27397,7 +27705,6 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v15i64_to_v60i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -27410,6 +27717,7 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -27618,7 +27926,6 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v15i64_to_v60i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -27631,6 +27938,7 @@ define <60 x i16> @bitcast_v15i64_to_v60i16(<15 x i64> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -28059,23 +28367,24 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v18, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_mov_b64 exec, s[4:5] ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_writelane_b32 v18, s30, 0 -; SI-NEXT: v_writelane_b32 v18, s31, 1 -; SI-NEXT: v_writelane_b32 v18, s34, 2 -; SI-NEXT: v_writelane_b32 v18, s35, 3 -; SI-NEXT: v_writelane_b32 v18, s36, 4 -; SI-NEXT: v_writelane_b32 v18, s37, 5 -; SI-NEXT: v_writelane_b32 v18, s38, 6 -; SI-NEXT: v_writelane_b32 v18, s39, 7 -; SI-NEXT: v_writelane_b32 v18, s48, 8 -; SI-NEXT: v_writelane_b32 v18, s49, 9 -; SI-NEXT: v_writelane_b32 v18, s50, 10 -; SI-NEXT: v_writelane_b32 v18, s51, 11 -; SI-NEXT: v_writelane_b32 v18, s52, 12 -; SI-NEXT: v_writelane_b32 v18, s53, 13 -; SI-NEXT: v_writelane_b32 v18, s54, 14 +; SI-NEXT: v_writelane_b32 v18, s34, 0 +; SI-NEXT: v_writelane_b32 v18, s35, 1 +; SI-NEXT: v_writelane_b32 v18, s36, 2 +; SI-NEXT: v_writelane_b32 v18, s37, 3 +; SI-NEXT: v_writelane_b32 v18, s38, 4 +; SI-NEXT: v_writelane_b32 v18, s39, 5 +; SI-NEXT: v_writelane_b32 v18, s48, 6 +; SI-NEXT: v_writelane_b32 v18, s49, 7 +; SI-NEXT: v_writelane_b32 v18, s50, 8 +; SI-NEXT: v_writelane_b32 v18, s51, 9 +; SI-NEXT: v_writelane_b32 v18, s52, 10 +; SI-NEXT: v_writelane_b32 v18, s53, 11 +; SI-NEXT: v_writelane_b32 v18, s54, 12 +; SI-NEXT: v_writelane_b32 v18, s55, 13 +; SI-NEXT: v_writelane_b32 v18, s64, 14 +; SI-NEXT: v_writelane_b32 v18, s30, 15 +; SI-NEXT: v_writelane_b32 v18, s31, 16 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: v_writelane_b32 v18, s55, 15 ; SI-NEXT: v_readfirstlane_b32 s42, v1 ; SI-NEXT: v_readfirstlane_b32 s43, v2 ; SI-NEXT: v_readfirstlane_b32 s40, v3 @@ -28093,7 +28402,6 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; SI-NEXT: v_readfirstlane_b32 s4, v15 ; SI-NEXT: s_and_b64 s[44:45], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v16 -; SI-NEXT: v_writelane_b32 v18, s64, 16 ; SI-NEXT: s_cbranch_scc0 .LBB41_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s34, s5, 16 @@ -28391,24 +28699,24 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x74, v0 ; SI-NEXT: v_mov_b32_e32 v1, s4 +; SI-NEXT: v_readlane_b32 s30, v18, 15 ; SI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; SI-NEXT: v_readlane_b32 s64, v18, 16 -; SI-NEXT: v_readlane_b32 s55, v18, 15 -; SI-NEXT: v_readlane_b32 s54, v18, 14 -; SI-NEXT: v_readlane_b32 s53, v18, 13 -; SI-NEXT: v_readlane_b32 s52, v18, 12 -; SI-NEXT: v_readlane_b32 s51, v18, 11 -; SI-NEXT: v_readlane_b32 s50, v18, 10 -; SI-NEXT: v_readlane_b32 s49, v18, 9 -; SI-NEXT: v_readlane_b32 s48, v18, 8 -; SI-NEXT: v_readlane_b32 s39, v18, 7 -; SI-NEXT: v_readlane_b32 s38, v18, 6 -; SI-NEXT: v_readlane_b32 s37, v18, 5 -; SI-NEXT: v_readlane_b32 s36, v18, 4 -; SI-NEXT: v_readlane_b32 s35, v18, 3 -; SI-NEXT: v_readlane_b32 s34, v18, 2 -; SI-NEXT: v_readlane_b32 s31, v18, 1 -; SI-NEXT: v_readlane_b32 s30, v18, 0 +; SI-NEXT: v_readlane_b32 s31, v18, 16 +; SI-NEXT: v_readlane_b32 s64, v18, 14 +; SI-NEXT: v_readlane_b32 s55, v18, 13 +; SI-NEXT: v_readlane_b32 s54, v18, 12 +; SI-NEXT: v_readlane_b32 s53, v18, 11 +; SI-NEXT: v_readlane_b32 s52, v18, 10 +; SI-NEXT: v_readlane_b32 s51, v18, 9 +; SI-NEXT: v_readlane_b32 s50, v18, 8 +; SI-NEXT: v_readlane_b32 s49, v18, 7 +; SI-NEXT: v_readlane_b32 s48, v18, 6 +; SI-NEXT: v_readlane_b32 s39, v18, 5 +; SI-NEXT: v_readlane_b32 s38, v18, 4 +; SI-NEXT: v_readlane_b32 s37, v18, 3 +; SI-NEXT: v_readlane_b32 s36, v18, 2 +; SI-NEXT: v_readlane_b32 s35, v18, 1 +; SI-NEXT: v_readlane_b32 s34, v18, 0 ; SI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; SI-NEXT: buffer_load_dword v18, off, s[0:3], s32 ; 4-byte Folded Reload ; SI-NEXT: s_mov_b64 exec, s[4:5] @@ -28453,14 +28761,15 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v30, s30, 0 -; VI-NEXT: v_writelane_b32 v30, s31, 1 -; VI-NEXT: v_writelane_b32 v30, s34, 2 -; VI-NEXT: v_writelane_b32 v30, s35, 3 -; VI-NEXT: v_writelane_b32 v30, s36, 4 -; VI-NEXT: v_writelane_b32 v30, s37, 5 +; VI-NEXT: v_writelane_b32 v30, s34, 0 +; VI-NEXT: v_writelane_b32 v30, s35, 1 +; VI-NEXT: v_writelane_b32 v30, s36, 2 +; VI-NEXT: v_writelane_b32 v30, s37, 3 +; VI-NEXT: v_writelane_b32 v30, s38, 4 +; VI-NEXT: v_writelane_b32 v30, s39, 5 +; VI-NEXT: v_writelane_b32 v30, s30, 6 +; VI-NEXT: v_writelane_b32 v30, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; VI-NEXT: v_writelane_b32 v30, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s45, v0 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s43, v2 @@ -28478,7 +28787,6 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; VI-NEXT: v_readfirstlane_b32 s6, v14 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v15 -; VI-NEXT: v_writelane_b32 v30, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB41_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s46, s7, 16 @@ -28664,6 +28972,7 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s44, s46, 16 ; VI-NEXT: s_or_b32 s7, s7, s44 +; VI-NEXT: v_readlane_b32 s30, v30, 6 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -28694,14 +29003,13 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; VI-NEXT: v_mov_b32_e32 v27, s8 ; VI-NEXT: v_mov_b32_e32 v28, s6 ; VI-NEXT: v_mov_b32_e32 v29, s7 -; VI-NEXT: v_readlane_b32 s39, v30, 7 -; VI-NEXT: v_readlane_b32 s38, v30, 6 -; VI-NEXT: v_readlane_b32 s37, v30, 5 -; VI-NEXT: v_readlane_b32 s36, v30, 4 -; VI-NEXT: v_readlane_b32 s35, v30, 3 -; VI-NEXT: v_readlane_b32 s34, v30, 2 -; VI-NEXT: v_readlane_b32 s31, v30, 1 -; VI-NEXT: v_readlane_b32 s30, v30, 0 +; VI-NEXT: v_readlane_b32 s31, v30, 7 +; VI-NEXT: v_readlane_b32 s39, v30, 5 +; VI-NEXT: v_readlane_b32 s38, v30, 4 +; VI-NEXT: v_readlane_b32 s37, v30, 3 +; VI-NEXT: v_readlane_b32 s36, v30, 2 +; VI-NEXT: v_readlane_b32 s35, v30, 1 +; VI-NEXT: v_readlane_b32 s34, v30, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -28746,10 +29054,11 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v30, s30, 0 -; GFX9-NEXT: v_writelane_b32 v30, s31, 1 +; GFX9-NEXT: v_writelane_b32 v30, s34, 0 +; GFX9-NEXT: v_writelane_b32 v30, s35, 1 +; GFX9-NEXT: v_writelane_b32 v30, s30, 2 +; GFX9-NEXT: v_writelane_b32 v30, s31, 3 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GFX9-NEXT: v_writelane_b32 v30, s34, 2 ; GFX9-NEXT: v_readfirstlane_b32 s6, v0 ; GFX9-NEXT: v_readfirstlane_b32 s7, v1 ; GFX9-NEXT: v_readfirstlane_b32 s8, v2 @@ -28767,7 +29076,6 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; GFX9-NEXT: v_readfirstlane_b32 s44, v14 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s45, v15 -; GFX9-NEXT: v_writelane_b32 v30, s35, 3 ; GFX9-NEXT: s_cbranch_scc0 .LBB41_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s46, s45, 16 @@ -28893,6 +29201,7 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; GFX9-NEXT: s_pack_ll_b32_b16 s41, s43, s56 ; GFX9-NEXT: s_pack_ll_b32_b16 s42, s44, s47 ; GFX9-NEXT: s_pack_ll_b32_b16 s43, s45, s46 +; GFX9-NEXT: v_readlane_b32 s30, v30, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 @@ -28923,10 +29232,9 @@ define inreg <60 x i16> @bitcast_v15i64_to_v60i16_scalar(<15 x i64> inreg %a, i3 ; GFX9-NEXT: v_mov_b32_e32 v27, s41 ; GFX9-NEXT: v_mov_b32_e32 v28, s42 ; GFX9-NEXT: v_mov_b32_e32 v29, s43 -; GFX9-NEXT: v_readlane_b32 s35, v30, 3 -; GFX9-NEXT: v_readlane_b32 s34, v30, 2 -; GFX9-NEXT: v_readlane_b32 s31, v30, 1 -; GFX9-NEXT: v_readlane_b32 s30, v30, 0 +; GFX9-NEXT: v_readlane_b32 s31, v30, 3 +; GFX9-NEXT: v_readlane_b32 s35, v30, 1 +; GFX9-NEXT: v_readlane_b32 s34, v30, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -31232,6 +31540,22 @@ define inreg <15 x i64> @bitcast_v60i16_to_v15i64_scalar(<60 x i16> inreg %a, i3 ; GFX9-LABEL: bitcast_v60i16_to_v15i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -31263,22 +31587,6 @@ define inreg <15 x i64> @bitcast_v60i16_to_v15i64_scalar(<60 x i16> inreg %a, i3 ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -31448,90 +31756,167 @@ define inreg <15 x i64> @bitcast_v60i16_to_v15i64_scalar(<60 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v15i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -31909,6 +32294,22 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; SI-LABEL: bitcast_v15i64_to_v60f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 @@ -31943,22 +32344,6 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; kill: killed $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr32 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr57 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -31992,7 +32377,7 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr42 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: ; kill: killed $vgpr31 @@ -32623,7 +33008,6 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; VI-LABEL: bitcast_v15i64_to_v60f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -32636,6 +33020,7 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -32844,7 +33229,6 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; GFX9-LABEL: bitcast_v15i64_to_v60f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -32857,6 +33241,7 @@ define <60 x half> @bitcast_v15i64_to_v60f16(<15 x i64> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -33281,6 +33666,19 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; SI-LABEL: bitcast_v15i64_to_v60f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; SI-NEXT: v_readfirstlane_b32 s44, v1 ; SI-NEXT: v_readfirstlane_b32 s45, v2 @@ -33299,19 +33697,6 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; SI-NEXT: v_readfirstlane_b32 s6, v15 ; SI-NEXT: s_and_b64 s[4:5], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s9, v16 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB45_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s4, s9, 16 @@ -33828,14 +34213,15 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v30, s30, 0 -; VI-NEXT: v_writelane_b32 v30, s31, 1 -; VI-NEXT: v_writelane_b32 v30, s34, 2 -; VI-NEXT: v_writelane_b32 v30, s35, 3 -; VI-NEXT: v_writelane_b32 v30, s36, 4 -; VI-NEXT: v_writelane_b32 v30, s37, 5 +; VI-NEXT: v_writelane_b32 v30, s34, 0 +; VI-NEXT: v_writelane_b32 v30, s35, 1 +; VI-NEXT: v_writelane_b32 v30, s36, 2 +; VI-NEXT: v_writelane_b32 v30, s37, 3 +; VI-NEXT: v_writelane_b32 v30, s38, 4 +; VI-NEXT: v_writelane_b32 v30, s39, 5 +; VI-NEXT: v_writelane_b32 v30, s30, 6 +; VI-NEXT: v_writelane_b32 v30, s31, 7 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; VI-NEXT: v_writelane_b32 v30, s38, 6 ; VI-NEXT: v_readfirstlane_b32 s45, v0 ; VI-NEXT: v_readfirstlane_b32 s44, v1 ; VI-NEXT: v_readfirstlane_b32 s43, v2 @@ -33853,7 +34239,6 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; VI-NEXT: v_readfirstlane_b32 s6, v14 ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_readfirstlane_b32 s7, v15 -; VI-NEXT: v_writelane_b32 v30, s39, 7 ; VI-NEXT: s_cbranch_scc0 .LBB45_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_lshr_b32 s46, s7, 16 @@ -34039,6 +34424,7 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; VI-NEXT: s_and_b32 s7, 0xffff, s7 ; VI-NEXT: s_lshl_b32 s44, s46, 16 ; VI-NEXT: s_or_b32 s7, s7, s44 +; VI-NEXT: v_readlane_b32 s30, v30, 6 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: v_mov_b32_e32 v2, s16 @@ -34069,14 +34455,13 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; VI-NEXT: v_mov_b32_e32 v27, s8 ; VI-NEXT: v_mov_b32_e32 v28, s6 ; VI-NEXT: v_mov_b32_e32 v29, s7 -; VI-NEXT: v_readlane_b32 s39, v30, 7 -; VI-NEXT: v_readlane_b32 s38, v30, 6 -; VI-NEXT: v_readlane_b32 s37, v30, 5 -; VI-NEXT: v_readlane_b32 s36, v30, 4 -; VI-NEXT: v_readlane_b32 s35, v30, 3 -; VI-NEXT: v_readlane_b32 s34, v30, 2 -; VI-NEXT: v_readlane_b32 s31, v30, 1 -; VI-NEXT: v_readlane_b32 s30, v30, 0 +; VI-NEXT: v_readlane_b32 s31, v30, 7 +; VI-NEXT: v_readlane_b32 s39, v30, 5 +; VI-NEXT: v_readlane_b32 s38, v30, 4 +; VI-NEXT: v_readlane_b32 s37, v30, 3 +; VI-NEXT: v_readlane_b32 s36, v30, 2 +; VI-NEXT: v_readlane_b32 s35, v30, 1 +; VI-NEXT: v_readlane_b32 s34, v30, 0 ; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; VI-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; VI-NEXT: s_mov_b64 exec, s[4:5] @@ -34121,10 +34506,11 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v30, s30, 0 -; GFX9-NEXT: v_writelane_b32 v30, s31, 1 +; GFX9-NEXT: v_writelane_b32 v30, s34, 0 +; GFX9-NEXT: v_writelane_b32 v30, s35, 1 +; GFX9-NEXT: v_writelane_b32 v30, s30, 2 +; GFX9-NEXT: v_writelane_b32 v30, s31, 3 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GFX9-NEXT: v_writelane_b32 v30, s34, 2 ; GFX9-NEXT: v_readfirstlane_b32 s6, v0 ; GFX9-NEXT: v_readfirstlane_b32 s7, v1 ; GFX9-NEXT: v_readfirstlane_b32 s8, v2 @@ -34142,7 +34528,6 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; GFX9-NEXT: v_readfirstlane_b32 s44, v14 ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_readfirstlane_b32 s45, v15 -; GFX9-NEXT: v_writelane_b32 v30, s35, 3 ; GFX9-NEXT: s_cbranch_scc0 .LBB45_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_lshr_b32 s46, s45, 16 @@ -34268,6 +34653,7 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; GFX9-NEXT: s_pack_ll_b32_b16 s41, s43, s56 ; GFX9-NEXT: s_pack_ll_b32_b16 s42, s44, s47 ; GFX9-NEXT: s_pack_ll_b32_b16 s43, s45, s46 +; GFX9-NEXT: v_readlane_b32 s30, v30, 2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 @@ -34298,10 +34684,9 @@ define inreg <60 x half> @bitcast_v15i64_to_v60f16_scalar(<15 x i64> inreg %a, i ; GFX9-NEXT: v_mov_b32_e32 v27, s41 ; GFX9-NEXT: v_mov_b32_e32 v28, s42 ; GFX9-NEXT: v_mov_b32_e32 v29, s43 -; GFX9-NEXT: v_readlane_b32 s35, v30, 3 -; GFX9-NEXT: v_readlane_b32 s34, v30, 2 -; GFX9-NEXT: v_readlane_b32 s31, v30, 1 -; GFX9-NEXT: v_readlane_b32 s30, v30, 0 +; GFX9-NEXT: v_readlane_b32 s31, v30, 3 +; GFX9-NEXT: v_readlane_b32 s35, v30, 1 +; GFX9-NEXT: v_readlane_b32 s34, v30, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -36999,6 +37384,22 @@ define inreg <15 x i64> @bitcast_v60f16_to_v15i64_scalar(<60 x half> inreg %a, i ; GFX9-LABEL: bitcast_v60f16_to_v15i64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -37030,22 +37431,6 @@ define inreg <15 x i64> @bitcast_v60f16_to_v15i64_scalar(<60 x half> inreg %a, i ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -37217,90 +37602,167 @@ define inreg <15 x i64> @bitcast_v60f16_to_v15i64_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v15i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -37678,7 +38140,6 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v15f64_to_v60i16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -37692,6 +38153,7 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr56 ; SI-NEXT: ; implicit-def: $vgpr60 ; SI-NEXT: ; implicit-def: $vgpr46 @@ -37721,7 +38183,7 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr32 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr38 -; SI-NEXT: s_waitcnt vmcnt(13) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc @@ -38019,7 +38481,6 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v15f64_to_v60i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -38032,6 +38493,7 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -38225,7 +38687,6 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v15f64_to_v60i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -38238,6 +38699,7 @@ define <60 x i16> @bitcast_v15f64_to_v60i16(<15 x double> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -38601,22 +39063,6 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; SI-LABEL: bitcast_v15f64_to_v60i16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 -; SI-NEXT: v_mov_b32_e32 v27, s16 -; SI-NEXT: v_mov_b32_e32 v28, s17 -; SI-NEXT: v_mov_b32_e32 v29, s18 -; SI-NEXT: v_mov_b32_e32 v30, s19 -; SI-NEXT: v_mov_b32_e32 v25, s20 -; SI-NEXT: v_mov_b32_e32 v26, s21 -; SI-NEXT: v_mov_b32_e32 v23, s22 -; SI-NEXT: v_mov_b32_e32 v24, s23 -; SI-NEXT: v_mov_b32_e32 v21, s24 -; SI-NEXT: v_mov_b32_e32 v22, s25 -; SI-NEXT: v_mov_b32_e32 v19, s26 -; SI-NEXT: v_mov_b32_e32 v20, s27 -; SI-NEXT: s_and_b64 s[4:5], vcc, exec -; SI-NEXT: v_mov_b32_e32 v17, s28 -; SI-NEXT: v_mov_b32_e32 v18, s29 ; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill @@ -38633,6 +39079,22 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 +; SI-NEXT: v_mov_b32_e32 v27, s16 +; SI-NEXT: v_mov_b32_e32 v28, s17 +; SI-NEXT: v_mov_b32_e32 v29, s18 +; SI-NEXT: v_mov_b32_e32 v30, s19 +; SI-NEXT: v_mov_b32_e32 v25, s20 +; SI-NEXT: v_mov_b32_e32 v26, s21 +; SI-NEXT: v_mov_b32_e32 v23, s22 +; SI-NEXT: v_mov_b32_e32 v24, s23 +; SI-NEXT: v_mov_b32_e32 v21, s24 +; SI-NEXT: v_mov_b32_e32 v22, s25 +; SI-NEXT: v_mov_b32_e32 v19, s26 +; SI-NEXT: v_mov_b32_e32 v20, s27 +; SI-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-NEXT: v_mov_b32_e32 v17, s28 +; SI-NEXT: v_mov_b32_e32 v18, s29 ; SI-NEXT: s_cbranch_scc0 .LBB49_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: v_lshr_b64 v[31:32], v[15:16], 16 @@ -38959,6 +39421,18 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; VI-LABEL: bitcast_v15f64_to_v60i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; VI-NEXT: v_mov_b32_e32 v17, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -38975,18 +39449,6 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v19, s28 ; VI-NEXT: v_mov_b32_e32 v20, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB49_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -39193,6 +39655,18 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; GFX9-LABEL: bitcast_v15f64_to_v60i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: v_mov_b32_e32 v17, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -39209,18 +39683,6 @@ define inreg <60 x i16> @bitcast_v15f64_to_v60i16_scalar(<15 x double> inreg %a, ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v19, s28 ; GFX9-NEXT: v_mov_b32_e32 v20, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB49_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -41880,6 +42342,22 @@ define inreg <15 x double> @bitcast_v60i16_to_v15f64_scalar(<60 x i16> inreg %a, ; GFX9-LABEL: bitcast_v60i16_to_v15f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -41911,22 +42389,6 @@ define inreg <15 x double> @bitcast_v60i16_to_v15f64_scalar(<60 x i16> inreg %a, ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -42096,90 +42558,167 @@ define inreg <15 x double> @bitcast_v60i16_to_v15f64_scalar(<60 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v15f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -42557,6 +43096,22 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; SI-LABEL: bitcast_v15f64_to_v60f16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; SI-NEXT: ; implicit-def: $vgpr38 ; SI-NEXT: ; kill: killed $vgpr38 @@ -42579,22 +43134,6 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; SI-NEXT: ; implicit-def: $vgpr38 ; SI-NEXT: ; kill: killed $vgpr38 ; SI-NEXT: ; implicit-def: $vgpr38 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; SI-NEXT: ; implicit-def: $vgpr41 ; SI-NEXT: ; implicit-def: $vgpr57 ; SI-NEXT: ; implicit-def: $vgpr55 @@ -42627,7 +43166,7 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; SI-NEXT: ; kill: killed $vgpr38 ; SI-NEXT: ; implicit-def: $vgpr48 ; SI-NEXT: ; implicit-def: $vgpr38 -; SI-NEXT: s_waitcnt vmcnt(14) +; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 ; SI-NEXT: ; implicit-def: $vgpr31 ; SI-NEXT: ; kill: killed $vgpr31 @@ -43240,7 +43779,6 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; VI-LABEL: bitcast_v15f64_to_v60f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -43253,6 +43791,7 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; VI-NEXT: ; implicit-def: $vgpr59 ; VI-NEXT: ; implicit-def: $vgpr58 ; VI-NEXT: ; implicit-def: $vgpr57 @@ -43446,7 +43985,6 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; GFX9-LABEL: bitcast_v15f64_to_v60f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -43459,6 +43997,7 @@ define <60 x half> @bitcast_v15f64_to_v60f16(<15 x double> %a, i32 %b) { ; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v30 ; GFX9-NEXT: ; implicit-def: $vgpr59 ; GFX9-NEXT: ; implicit-def: $vgpr58 ; GFX9-NEXT: ; implicit-def: $vgpr57 @@ -43822,6 +44361,22 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; SI-LABEL: bitcast_v15f64_to_v60f16_scalar: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v17 ; SI-NEXT: v_readfirstlane_b32 s42, v1 ; SI-NEXT: v_readfirstlane_b32 s43, v2 @@ -43840,22 +44395,6 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; SI-NEXT: v_readfirstlane_b32 s4, v15 ; SI-NEXT: s_and_b64 s[44:45], vcc, exec ; SI-NEXT: v_readfirstlane_b32 s5, v16 -; SI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; SI-NEXT: s_cbranch_scc0 .LBB53_4 ; SI-NEXT: ; %bb.1: ; %cmp.false ; SI-NEXT: s_lshr_b32 s44, s5, 16 @@ -44378,6 +44917,18 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; VI-LABEL: bitcast_v15f64_to_v60f16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; VI-NEXT: v_mov_b32_e32 v17, s16 ; VI-NEXT: v_mov_b32_e32 v18, s17 @@ -44394,18 +44945,6 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; VI-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-NEXT: v_mov_b32_e32 v19, s28 ; VI-NEXT: v_mov_b32_e32 v20, s29 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB53_4 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -44612,6 +45151,18 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; GFX9-LABEL: bitcast_v15f64_to_v60f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: v_mov_b32_e32 v17, s16 ; GFX9-NEXT: v_mov_b32_e32 v18, s17 @@ -44628,18 +45179,6 @@ define inreg <60 x half> @bitcast_v15f64_to_v60f16_scalar(<15 x double> inreg %a ; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX9-NEXT: v_mov_b32_e32 v19, s28 ; GFX9-NEXT: v_mov_b32_e32 v20, s29 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB53_4 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: v_lshrrev_b32_e32 v54, 16, v15 @@ -47691,6 +48230,22 @@ define inreg <15 x double> @bitcast_v60f16_to_v15f64_scalar(<60 x half> inreg %a ; GFX9-LABEL: bitcast_v60f16_to_v15f64_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v32, v15 ; GFX9-NEXT: v_mov_b32_e32 v33, v14 ; GFX9-NEXT: v_mov_b32_e32 v34, v13 @@ -47722,22 +48277,6 @@ define inreg <15 x double> @bitcast_v60f16_to_v15f64_scalar(<60 x half> inreg %a ; GFX9-NEXT: s_lshr_b32 s8, s18, 16 ; GFX9-NEXT: s_lshr_b32 s7, s17, 16 ; GFX9-NEXT: s_lshr_b32 s6, s16, 16 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_lshrrev_b32_e32 v40, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v41, 16, v33 ; GFX9-NEXT: v_lshrrev_b32_e32 v42, 16, v34 @@ -47909,90 +48448,167 @@ define inreg <15 x double> @bitcast_v60f16_to_v15f64_scalar(<60 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v15f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 ; GFX11-TRUE16-NEXT: s_clause 0x1f ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 ; GFX11-TRUE16-NEXT: s_clause 0xf ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 @@ -51092,6 +51708,18 @@ define inreg <60 x half> @bitcast_v60i16_to_v60f16_scalar(<60 x i16> inreg %a, i ; GFX9-LABEL: bitcast_v60i16_to_v60f16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -51124,18 +51752,6 @@ define inreg <60 x half> @bitcast_v60i16_to_v60f16_scalar(<60 x i16> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v18, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB57_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB57_4 @@ -53772,6 +54388,18 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; VI-LABEL: bitcast_v60f16_to_v60i16_scalar: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; VI-NEXT: s_lshr_b32 s6, s29, 16 ; VI-NEXT: s_lshr_b32 s7, s28, 16 @@ -53804,18 +54432,6 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v2 ; VI-NEXT: v_lshrrev_b32_e32 v31, 16, v1 ; VI-NEXT: v_lshrrev_b32_e32 v30, 16, v0 -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; VI-NEXT: s_cbranch_scc0 .LBB59_3 ; VI-NEXT: ; %bb.1: ; %cmp.false ; VI-NEXT: s_cbranch_execnz .LBB59_4 @@ -54008,6 +54624,18 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; GFX9-LABEL: bitcast_v60f16_to_v60i16_scalar: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GFX9-NEXT: s_lshr_b32 s43, s29, 16 ; GFX9-NEXT: s_lshr_b32 s42, s28, 16 @@ -54040,18 +54668,6 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; GFX9-NEXT: v_lshrrev_b32_e32 v18, 16, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_cbranch_scc0 .LBB59_3 ; GFX9-NEXT: ; %bb.1: ; %cmp.false ; GFX9-NEXT: s_cbranch_execnz .LBB59_4 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll index 2889f37a65d97..7f6bb85827d31 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll @@ -33,19 +33,21 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, ; GISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, v8 :: v_dual_mov_b32 v5, v9 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, v10 :: v_dual_mov_b32 v7, v11 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo ; GISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi -; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 +; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-GFX11-NEXT: s_endpgm ; ; GISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10 @@ -58,26 +60,27 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, ; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo ; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi ; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] -; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-GFX10-NEXT: s_endpgm ; ; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call: ; DAGISEL-GFX11: ; %bb.0: ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v7, v11 :: v_dual_mov_b32 v6, v10 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v5, v9 :: v_dual_mov_b32 v4, v8 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; DAGISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi ; DAGISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo -; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 +; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] ; DAGISEL-GFX11-NEXT: s_endpgm ; ; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call: ; DAGISEL-GFX10: ; %bb.0: ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9 @@ -90,7 +93,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi ; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo ; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] -; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; DAGISEL-GFX10-NEXT: s_endpgm call amdgpu_gfx void @use(<4 x i32> %sgpr, <4 x i32> %vgpr) @@ -102,7 +104,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 -; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9 +; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 4 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v17, s24 @@ -123,6 +125,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25 ; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 40 +; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15 @@ -162,6 +165,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX10-LABEL: amdgpu_cs_chain_spill: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v32, v8 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v33, v9 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v34, v10 @@ -170,7 +174,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v37, v13 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v38, v14 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v39, v15 -; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; GISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4 ; GISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8 @@ -230,7 +233,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX11: ; %bb.0: ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 -; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14 +; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 60 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v31, s24 @@ -251,6 +254,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25 ; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 24 +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v34, v13 :: v_dual_mov_b32 v35, v12 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v36, v11 :: v_dual_mov_b32 v37, v10 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v38, v9 :: v_dual_mov_b32 v39, v8 @@ -290,6 +294,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_spill: ; DAGISEL-GFX10: ; %bb.0: ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v32, v15 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v33, v14 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v34, v13 @@ -298,7 +303,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v37, v10 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v38, v9 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v39, v8 -; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; DAGISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4 ; DAGISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8 @@ -361,10 +365,10 @@ define amdgpu_cs_chain void @alloca_and_call() { ; GISEL-GFX11-LABEL: alloca_and_call: ; GISEL-GFX11: ; %bb.0: ; %.entry ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: s_mov_b32 s32, 16 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo ; GISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi -; GISEL-GFX11-NEXT: s_mov_b32 s32, 16 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v0, off ; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -373,6 +377,7 @@ define amdgpu_cs_chain void @alloca_and_call() { ; GISEL-GFX10-LABEL: alloca_and_call: ; GISEL-GFX10: ; %bb.0: ; %.entry ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: s_movk_i32 s32, 0x200 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] ; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo @@ -380,17 +385,16 @@ define amdgpu_cs_chain void @alloca_and_call() { ; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GISEL-GFX10-NEXT: s_movk_i32 s32, 0x200 ; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-GFX10-NEXT: s_endpgm ; ; DAGISEL-GFX11-LABEL: alloca_and_call: ; DAGISEL-GFX11: ; %bb.0: ; %.entry ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 16 ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42 ; DAGISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi ; DAGISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo -; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 16 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v0, off ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0 ; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -399,6 +403,7 @@ define amdgpu_cs_chain void @alloca_and_call() { ; DAGISEL-GFX10-LABEL: alloca_and_call: ; DAGISEL-GFX10: ; %bb.0: ; %.entry ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: s_movk_i32 s32, 0x200 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42 ; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi @@ -406,7 +411,6 @@ define amdgpu_cs_chain void @alloca_and_call() { ; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; DAGISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 0 -; DAGISEL-GFX10-NEXT: s_movk_i32 s32, 0x200 ; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; DAGISEL-GFX10-NEXT: s_endpgm .entry: diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll index 36e2db0c4879d..a4882f1119e70 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll @@ -420,6 +420,7 @@ define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_use_all_v0_v7(<3 x ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX11-NEXT: s_clause 0x1 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v11, off offset:4 +; GISEL-GFX11-NEXT: ; meta instruction ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 ; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 @@ -461,6 +462,7 @@ define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_use_all_v0_v7(<3 x ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; DAGISEL-GFX11-NEXT: s_clause 0x1 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v11, off offset:4 +; DAGISEL-GFX11-NEXT: ; meta instruction ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll index 0329f23ea434f..954812c09d19a 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll @@ -118,32 +118,32 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: v_writelane_b32 v43, s34, 0 +; CHECK-NEXT: v_writelane_b32 v43, s35, 1 +; CHECK-NEXT: v_writelane_b32 v43, s36, 2 +; CHECK-NEXT: v_writelane_b32 v43, s37, 3 +; CHECK-NEXT: v_writelane_b32 v43, s38, 4 +; CHECK-NEXT: v_writelane_b32 v43, s39, 5 +; CHECK-NEXT: v_writelane_b32 v43, s48, 6 +; CHECK-NEXT: v_writelane_b32 v43, s49, 7 +; CHECK-NEXT: v_writelane_b32 v43, s50, 8 +; CHECK-NEXT: v_writelane_b32 v43, s51, 9 +; CHECK-NEXT: v_writelane_b32 v43, s52, 10 +; CHECK-NEXT: v_writelane_b32 v43, s53, 11 +; CHECK-NEXT: v_writelane_b32 v43, s30, 12 +; CHECK-NEXT: v_writelane_b32 v43, s31, 13 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v42, v1 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v2 ; CHECK-NEXT: s_mov_b32 s50, s15 @@ -177,21 +177,21 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s30, v43, 12 ; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 +; CHECK-NEXT: v_readlane_b32 s31, v43, 13 +; CHECK-NEXT: v_readlane_b32 s53, v43, 11 +; CHECK-NEXT: v_readlane_b32 s52, v43, 10 +; CHECK-NEXT: v_readlane_b32 s51, v43, 9 +; CHECK-NEXT: v_readlane_b32 s50, v43, 8 +; CHECK-NEXT: v_readlane_b32 s49, v43, 7 +; CHECK-NEXT: v_readlane_b32 s48, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 5 +; CHECK-NEXT: v_readlane_b32 s38, v43, 4 +; CHECK-NEXT: v_readlane_b32 s37, v43, 3 +; CHECK-NEXT: v_readlane_b32 s36, v43, 2 +; CHECK-NEXT: v_readlane_b32 s35, v43, 1 +; CHECK-NEXT: v_readlane_b32 s34, v43, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v43, 14 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -258,30 +258,30 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: v_writelane_b32 v43, s34, 0 +; CHECK-NEXT: v_writelane_b32 v43, s35, 1 +; CHECK-NEXT: v_writelane_b32 v43, s36, 2 +; CHECK-NEXT: v_writelane_b32 v43, s37, 3 +; CHECK-NEXT: v_writelane_b32 v43, s38, 4 +; CHECK-NEXT: v_writelane_b32 v43, s39, 5 +; CHECK-NEXT: v_writelane_b32 v43, s48, 6 +; CHECK-NEXT: v_writelane_b32 v43, s49, 7 +; CHECK-NEXT: v_writelane_b32 v43, s50, 8 +; CHECK-NEXT: v_writelane_b32 v43, s51, 9 +; CHECK-NEXT: v_writelane_b32 v43, s52, 10 +; CHECK-NEXT: v_writelane_b32 v43, s53, 11 +; CHECK-NEXT: v_writelane_b32 v43, s30, 12 +; CHECK-NEXT: v_writelane_b32 v43, s31, 13 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v42, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v3 ; CHECK-NEXT: v_mov_b32_e32 v40, v2 @@ -313,20 +313,20 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 +; CHECK-NEXT: v_readlane_b32 s30, v43, 12 +; CHECK-NEXT: v_readlane_b32 s31, v43, 13 +; CHECK-NEXT: v_readlane_b32 s53, v43, 11 +; CHECK-NEXT: v_readlane_b32 s52, v43, 10 +; CHECK-NEXT: v_readlane_b32 s51, v43, 9 +; CHECK-NEXT: v_readlane_b32 s50, v43, 8 +; CHECK-NEXT: v_readlane_b32 s49, v43, 7 +; CHECK-NEXT: v_readlane_b32 s48, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 5 +; CHECK-NEXT: v_readlane_b32 s38, v43, 4 +; CHECK-NEXT: v_readlane_b32 s37, v43, 3 +; CHECK-NEXT: v_readlane_b32 s36, v43, 2 +; CHECK-NEXT: v_readlane_b32 s35, v43, 1 +; CHECK-NEXT: v_readlane_b32 s34, v43, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v43, 14 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -400,32 +400,32 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: v_writelane_b32 v43, s34, 0 +; CHECK-NEXT: v_writelane_b32 v43, s35, 1 +; CHECK-NEXT: v_writelane_b32 v43, s36, 2 +; CHECK-NEXT: v_writelane_b32 v43, s37, 3 +; CHECK-NEXT: v_writelane_b32 v43, s38, 4 +; CHECK-NEXT: v_writelane_b32 v43, s39, 5 +; CHECK-NEXT: v_writelane_b32 v43, s48, 6 +; CHECK-NEXT: v_writelane_b32 v43, s49, 7 +; CHECK-NEXT: v_writelane_b32 v43, s50, 8 +; CHECK-NEXT: v_writelane_b32 v43, s51, 9 +; CHECK-NEXT: v_writelane_b32 v43, s52, 10 +; CHECK-NEXT: v_writelane_b32 v43, s53, 11 +; CHECK-NEXT: v_writelane_b32 v43, s30, 12 +; CHECK-NEXT: v_writelane_b32 v43, s31, 13 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v42, v1 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v2 ; CHECK-NEXT: s_mov_b32 s50, s15 @@ -459,21 +459,21 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s30, v43, 12 ; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 +; CHECK-NEXT: v_readlane_b32 s31, v43, 13 +; CHECK-NEXT: v_readlane_b32 s53, v43, 11 +; CHECK-NEXT: v_readlane_b32 s52, v43, 10 +; CHECK-NEXT: v_readlane_b32 s51, v43, 9 +; CHECK-NEXT: v_readlane_b32 s50, v43, 8 +; CHECK-NEXT: v_readlane_b32 s49, v43, 7 +; CHECK-NEXT: v_readlane_b32 s48, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 5 +; CHECK-NEXT: v_readlane_b32 s38, v43, 4 +; CHECK-NEXT: v_readlane_b32 s37, v43, 3 +; CHECK-NEXT: v_readlane_b32 s36, v43, 2 +; CHECK-NEXT: v_readlane_b32 s35, v43, 1 +; CHECK-NEXT: v_readlane_b32 s34, v43, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v43, 14 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -542,30 +542,30 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v42, s16, 14 -; CHECK-NEXT: v_writelane_b32 v42, s30, 0 -; CHECK-NEXT: v_writelane_b32 v42, s31, 1 -; CHECK-NEXT: v_writelane_b32 v42, s34, 2 -; CHECK-NEXT: v_writelane_b32 v42, s35, 3 -; CHECK-NEXT: v_writelane_b32 v42, s36, 4 -; CHECK-NEXT: v_writelane_b32 v42, s37, 5 -; CHECK-NEXT: v_writelane_b32 v42, s38, 6 -; CHECK-NEXT: v_writelane_b32 v42, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v42, s48, 8 -; CHECK-NEXT: v_writelane_b32 v42, s49, 9 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: v_writelane_b32 v42, s34, 0 +; CHECK-NEXT: v_writelane_b32 v42, s35, 1 +; CHECK-NEXT: v_writelane_b32 v42, s36, 2 +; CHECK-NEXT: v_writelane_b32 v42, s37, 3 +; CHECK-NEXT: v_writelane_b32 v42, s38, 4 +; CHECK-NEXT: v_writelane_b32 v42, s39, 5 +; CHECK-NEXT: v_writelane_b32 v42, s48, 6 +; CHECK-NEXT: v_writelane_b32 v42, s49, 7 +; CHECK-NEXT: v_writelane_b32 v42, s50, 8 +; CHECK-NEXT: v_writelane_b32 v42, s51, 9 +; CHECK-NEXT: v_writelane_b32 v42, s52, 10 +; CHECK-NEXT: v_writelane_b32 v42, s53, 11 +; CHECK-NEXT: v_writelane_b32 v42, s30, 12 +; CHECK-NEXT: v_writelane_b32 v42, s31, 13 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v42, s50, 10 -; CHECK-NEXT: v_writelane_b32 v42, s51, 11 -; CHECK-NEXT: v_writelane_b32 v42, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v42, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: s_mov_b32 s50, s15 ; CHECK-NEXT: s_mov_b32 s51, s14 @@ -596,20 +596,20 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s53, v42, 13 -; CHECK-NEXT: v_readlane_b32 s52, v42, 12 -; CHECK-NEXT: v_readlane_b32 s51, v42, 11 -; CHECK-NEXT: v_readlane_b32 s50, v42, 10 -; CHECK-NEXT: v_readlane_b32 s49, v42, 9 -; CHECK-NEXT: v_readlane_b32 s48, v42, 8 -; CHECK-NEXT: v_readlane_b32 s39, v42, 7 -; CHECK-NEXT: v_readlane_b32 s38, v42, 6 -; CHECK-NEXT: v_readlane_b32 s37, v42, 5 -; CHECK-NEXT: v_readlane_b32 s36, v42, 4 -; CHECK-NEXT: v_readlane_b32 s35, v42, 3 -; CHECK-NEXT: v_readlane_b32 s34, v42, 2 -; CHECK-NEXT: v_readlane_b32 s31, v42, 1 -; CHECK-NEXT: v_readlane_b32 s30, v42, 0 +; CHECK-NEXT: v_readlane_b32 s30, v42, 12 +; CHECK-NEXT: v_readlane_b32 s31, v42, 13 +; CHECK-NEXT: v_readlane_b32 s53, v42, 11 +; CHECK-NEXT: v_readlane_b32 s52, v42, 10 +; CHECK-NEXT: v_readlane_b32 s51, v42, 9 +; CHECK-NEXT: v_readlane_b32 s50, v42, 8 +; CHECK-NEXT: v_readlane_b32 s49, v42, 7 +; CHECK-NEXT: v_readlane_b32 s48, v42, 6 +; CHECK-NEXT: v_readlane_b32 s39, v42, 5 +; CHECK-NEXT: v_readlane_b32 s38, v42, 4 +; CHECK-NEXT: v_readlane_b32 s37, v42, 3 +; CHECK-NEXT: v_readlane_b32 s36, v42, 2 +; CHECK-NEXT: v_readlane_b32 s35, v42, 1 +; CHECK-NEXT: v_readlane_b32 s34, v42, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v42, 14 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -683,32 +683,32 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: v_writelane_b32 v43, s34, 0 +; CHECK-NEXT: v_writelane_b32 v43, s35, 1 +; CHECK-NEXT: v_writelane_b32 v43, s36, 2 +; CHECK-NEXT: v_writelane_b32 v43, s37, 3 +; CHECK-NEXT: v_writelane_b32 v43, s38, 4 +; CHECK-NEXT: v_writelane_b32 v43, s39, 5 +; CHECK-NEXT: v_writelane_b32 v43, s48, 6 +; CHECK-NEXT: v_writelane_b32 v43, s49, 7 +; CHECK-NEXT: v_writelane_b32 v43, s50, 8 +; CHECK-NEXT: v_writelane_b32 v43, s51, 9 +; CHECK-NEXT: v_writelane_b32 v43, s52, 10 +; CHECK-NEXT: v_writelane_b32 v43, s53, 11 +; CHECK-NEXT: v_writelane_b32 v43, s30, 12 +; CHECK-NEXT: v_writelane_b32 v43, s31, 13 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v41, v1 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v41 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: s_mov_b32 s50, s15 ; CHECK-NEXT: s_mov_b32 s51, s14 @@ -741,21 +741,21 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: v_readlane_b32 s30, v43, 12 ; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 +; CHECK-NEXT: v_readlane_b32 s31, v43, 13 +; CHECK-NEXT: v_readlane_b32 s53, v43, 11 +; CHECK-NEXT: v_readlane_b32 s52, v43, 10 +; CHECK-NEXT: v_readlane_b32 s51, v43, 9 +; CHECK-NEXT: v_readlane_b32 s50, v43, 8 +; CHECK-NEXT: v_readlane_b32 s49, v43, 7 +; CHECK-NEXT: v_readlane_b32 s48, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 5 +; CHECK-NEXT: v_readlane_b32 s38, v43, 4 +; CHECK-NEXT: v_readlane_b32 s37, v43, 3 +; CHECK-NEXT: v_readlane_b32 s36, v43, 2 +; CHECK-NEXT: v_readlane_b32 s35, v43, 1 +; CHECK-NEXT: v_readlane_b32 s34, v43, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v43, 14 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll index 583b6fe0a81ca..d4b07768e92a2 100644 --- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll +++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll @@ -205,17 +205,17 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v3, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v3, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v3, s30, 0 -; GFX8-NEXT: v_writelane_b32 v3, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX8-NEXT: v_readlane_b32 s31, v3, 1 ; GFX8-NEXT: v_readlane_b32 s30, v3, 0 +; GFX8-NEXT: v_readlane_b32 s31, v3, 1 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -233,17 +233,17 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX8-ARCH-FLAT-NEXT: s_add_i32 s3, s33, 8 ; GFX8-ARCH-FLAT-NEXT: scratch_store_dword off, v3, s3 ; 4-byte Folded Spill ; GFX8-ARCH-FLAT-NEXT: s_mov_b64 exec, s[0:1] +; GFX8-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 ; GFX8-ARCH-FLAT-NEXT: s_add_i32 s32, s32, 16 +; GFX8-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX8-ARCH-FLAT-NEXT: s_getpc_b64 s[0:1] ; GFX8-ARCH-FLAT-NEXT: s_add_u32 s0, s0, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX8-ARCH-FLAT-NEXT: s_addc_u32 s1, s1, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 ; GFX8-ARCH-FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX8-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 -; GFX8-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX8-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-ARCH-FLAT-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX8-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX8-ARCH-FLAT-NEXT: v_readlane_b32 s30, v3, 0 +; GFX8-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX8-ARCH-FLAT-NEXT: s_mov_b32 s32, s33 ; GFX8-ARCH-FLAT-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX8-ARCH-FLAT-NEXT: s_add_i32 s3, s33, 8 @@ -261,17 +261,17 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[16:17] +; GFX9-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[16:17] ; GFX9-NEXT: s_add_u32 s16, s16, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s17, s17, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX9-NEXT: v_writelane_b32 v3, s30, 0 -; GFX9-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-NEXT: v_readlane_b32 s30, v3, 0 +; GFX9-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -288,17 +288,17 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX9-ARCH-FLAT-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX9-ARCH-FLAT-NEXT: scratch_store_dword off, v3, s33 ; 4-byte Folded Spill ; GFX9-ARCH-FLAT-NEXT: s_mov_b64 exec, s[0:1] +; GFX9-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-ARCH-FLAT-NEXT: s_add_i32 s32, s32, 16 +; GFX9-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-ARCH-FLAT-NEXT: s_getpc_b64 s[0:1] ; GFX9-ARCH-FLAT-NEXT: s_add_u32 s0, s0, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX9-ARCH-FLAT-NEXT: s_addc_u32 s1, s1, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 ; GFX9-ARCH-FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 -; GFX9-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ARCH-FLAT-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX9-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-ARCH-FLAT-NEXT: v_readlane_b32 s30, v3, 0 +; GFX9-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-ARCH-FLAT-NEXT: s_mov_b32 s32, s33 ; GFX9-ARCH-FLAT-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX9-ARCH-FLAT-NEXT: scratch_load_dword v3, off, s33 ; 4-byte Folded Reload @@ -315,17 +315,18 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX942-ARCH-FLAT-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-ARCH-FLAT-NEXT: scratch_store_dword off, v3, s33 ; 4-byte Folded Spill ; GFX942-ARCH-FLAT-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 ; GFX942-ARCH-FLAT-NEXT: s_add_i32 s32, s32, 16 +; GFX942-ARCH-FLAT-NEXT: s_nop 0 +; GFX942-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX942-ARCH-FLAT-NEXT: s_getpc_b64 s[0:1] ; GFX942-ARCH-FLAT-NEXT: s_add_u32 s0, s0, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX942-ARCH-FLAT-NEXT: s_addc_u32 s1, s1, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 ; GFX942-ARCH-FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX942-ARCH-FLAT-NEXT: v_writelane_b32 v3, s30, 0 -; GFX942-ARCH-FLAT-NEXT: v_writelane_b32 v3, s31, 1 ; GFX942-ARCH-FLAT-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-ARCH-FLAT-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX942-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX942-ARCH-FLAT-NEXT: v_readlane_b32 s30, v3, 0 +; GFX942-ARCH-FLAT-NEXT: v_readlane_b32 s31, v3, 1 ; GFX942-ARCH-FLAT-NEXT: s_mov_b32 s32, s33 ; GFX942-ARCH-FLAT-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-ARCH-FLAT-NEXT: scratch_load_dword v3, off, s33 ; 4-byte Folded Reload @@ -343,17 +344,17 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 { ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, with_private_to_flat_addrspacecast@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, with_private_to_flat_addrspacecast@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX10-NEXT: v_readlane_b32 s31, v3, 1 ; GFX10-NEXT: v_readlane_b32 s30, v3, 0 +; GFX10-NEXT: v_readlane_b32 s31, v3, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir index dfe4b8a33f396..02856a31d2fb7 100644 --- a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir +++ b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir @@ -21,6 +21,8 @@ body: | ; GFX908-PEI-LABEL: name: agpr-spill-to-vgpr-machine-cp ; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GFX908-PEI-NEXT: {{ $}} + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec ; GFX908-PEI-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec ; GFX908-PEI-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 @@ -31,6 +33,8 @@ body: | ; GFX908-PEI-MACHINECP-LABEL: name: agpr-spill-to-vgpr-machine-cp ; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GFX908-PEI-MACHINECP-NEXT: {{ $}} + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec ; GFX908-PEI-MACHINECP-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 @@ -63,6 +67,8 @@ body: | ; GFX908-PEI-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp ; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-NEXT: {{ $}} + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec @@ -79,6 +85,8 @@ body: | ; GFX908-PEI-MACHINECP-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp ; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-MACHINECP-NEXT: {{ $}} + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-MACHINECP-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-MACHINECP-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir index a2ec87053a8d5..ceb271bd57233 100644 --- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir +++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir @@ -27,21 +27,498 @@ body: | ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GCN-NEXT: liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr105 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr106 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr107 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr108 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr109 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr110 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr111 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr112 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr113 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr114 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr115 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr116 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr117 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr118 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr119 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr120 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr121 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr122 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr123 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr124 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr125 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr126 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr127 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr128 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr129 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr130 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr131 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr132 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr133 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr134 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr135 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr136 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr137 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr138 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr139 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr140 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr141 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr142 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr143 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr144 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr145 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr146 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr147 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr148 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr149 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr150 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr151 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr152 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr153 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr154 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr155 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr156 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr157 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr158 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr159 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr160 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr161 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr162 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr163 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr164 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr165 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr166 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr167 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr168 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr169 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr170 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr171 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr172 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr173 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr174 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr175 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr176 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr177 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr178 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr179 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr180 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr181 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr182 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr183 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr184 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr185 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr186 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr187 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr188 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr189 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr190 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr191 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr192 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr193 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr194 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr195 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr196 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr197 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr198 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr199 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr200 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr201 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr202 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr203 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr204 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr205 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr206 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr207 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr208 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr209 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr210 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr211 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr212 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr213 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr214 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr215 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr216 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr217 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr218 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr219 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr220 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr221 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr222 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr223 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr224 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr225 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr226 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr227 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr228 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr229 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr230 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr231 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr232 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr233 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr234 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr235 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr236 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr237 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr238 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr239 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr240 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr241 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr242 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr243 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr244 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr245 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr246 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr247 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr248 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr249 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr250 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr251 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr252 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr253 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr254 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr255 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 4352 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 2816 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 2560 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 2304 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2048 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 1792 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 1536 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1280 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1024 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 768 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 512 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 256 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 0 ; GCN-NEXT: renamable $vgpr44 = COPY $vgpr13, implicit $exec ; GCN-NEXT: renamable $vgpr43 = COPY $vgpr12, implicit $exec ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 711d57baac15f..35b9d9d4996da 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -4392,8 +4392,8 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GCN-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v2, 1 ; GCN-NEXT: v_readlane_b32 s30, v2, 0 +; GCN-NEXT: v_readlane_b32 s31, v2, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4410,21 +4410,21 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v2, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v2, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v2, s30, 0 -; GFX7-NEXT: v_writelane_b32 v2, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_readlane_b32 s30, v2, 0 ; GFX7-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v2, 1 -; GFX7-NEXT: v_readlane_b32 s30, v2, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4441,19 +4441,19 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v2, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v2, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v2, s30, 0 -; GFX8-NEXT: v_writelane_b32 v2, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX8-NEXT: v_readlane_b32 s30, v2, 0 ; GFX8-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v2, 1 -; GFX8-NEXT: v_readlane_b32 s30, v2, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4470,19 +4470,19 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v2, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v2, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v2, s30, 0 -; GFX900-NEXT: v_writelane_b32 v2, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v2, 0 ; GFX900-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v2, 1 -; GFX900-NEXT: v_readlane_b32 s30, v2, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4499,19 +4499,20 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v4, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v4, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v4, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v4, s30, 0 -; GFX950-NEXT: v_writelane_b32 v4, s31, 1 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v4, 0 ; GFX950-NEXT: scratch_store_short v1, v0, off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v4, 1 -; GFX950-NEXT: v_readlane_b32 s30, v4, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v4, off, s33 ; 4-byte Folded Reload @@ -4529,19 +4530,19 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: buffer_store_short v0, v1, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v2, 1 -; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4559,19 +4560,19 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v2, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v2, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v2, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: scratch_store_b16 v1, v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v2, 1 -; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v2, off, s33 ; 4-byte Folded Reload @@ -4590,18 +4591,18 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v4, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v4, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 ; GFX1250-NEXT: v_writelane_b32 v4, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store@gotpcrel+4 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v4, 0 ; GFX1250-NEXT: scratch_store_b16 v1, v0, off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v4, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v4, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -4645,8 +4646,8 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_short v0, v2, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v4, 1 ; GCN-NEXT: v_readlane_b32 s30, v4, 0 +; GCN-NEXT: v_readlane_b32 s31, v4, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4663,26 +4664,26 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v4, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v4, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v4, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v4, s30, 0 -; GFX7-NEXT: v_writelane_b32 v4, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX7-NEXT: v_add_i32_e32 v3, vcc, 2, v2 +; GFX7-NEXT: v_readlane_b32 s30, v4, 0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: buffer_store_short v1, v3, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_store_short v0, v2, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v4, 1 -; GFX7-NEXT: v_readlane_b32 s30, v4, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4699,19 +4700,19 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v2, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v2, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v2, s30, 0 -; GFX8-NEXT: v_writelane_b32 v2, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX8-NEXT: v_readlane_b32 s30, v2, 0 ; GFX8-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v2, 1 -; GFX8-NEXT: v_readlane_b32 s30, v2, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4728,19 +4729,19 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v2, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v2, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v2, s30, 0 -; GFX900-NEXT: v_writelane_b32 v2, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v2, 0 ; GFX900-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v2, 1 -; GFX900-NEXT: v_readlane_b32 s30, v2, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4757,19 +4758,20 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v4, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v4, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v4, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v4, s30, 0 -; GFX950-NEXT: v_writelane_b32 v4, s31, 1 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v4, 0 ; GFX950-NEXT: scratch_store_dword v1, v0, off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v4, 1 -; GFX950-NEXT: v_readlane_b32 s30, v4, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v4, off, s33 ; 4-byte Folded Reload @@ -4787,19 +4789,19 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v2, 1 -; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4817,19 +4819,19 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v2, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v2, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v2, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: scratch_store_b32 v1, v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v2, 1 -; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v2, off, s33 ; 4-byte Folded Reload @@ -4848,18 +4850,18 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v4, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v4, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 ; GFX1250-NEXT: v_writelane_b32 v4, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v4, 0 ; GFX1250-NEXT: scratch_store_b32 v1, v0, off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v4, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v4, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -4905,8 +4907,8 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, v3, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v5, 1 ; GCN-NEXT: v_readlane_b32 s30, v5, 0 +; GCN-NEXT: v_readlane_b32 s31, v5, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4923,13 +4925,13 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v4, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v4, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v4, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v4, s30, 0 -; GFX7-NEXT: v_writelane_b32 v4, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 @@ -4939,12 +4941,12 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v2 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 4, v3 +; GFX7-NEXT: v_readlane_b32 s30, v4, 0 ; GFX7-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_store_dword v0, v3, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v4, 1 -; GFX7-NEXT: v_readlane_b32 s30, v4, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4961,22 +4963,22 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v4, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v4, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v4, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v4, s30, 0 -; GFX8-NEXT: v_writelane_b32 v4, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 4, v2 +; GFX8-NEXT: v_readlane_b32 s30, v4, 0 ; GFX8-NEXT: buffer_store_short v1, v3, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v4, 1 -; GFX8-NEXT: v_readlane_b32 s30, v4, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload @@ -4993,21 +4995,21 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v3, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v3, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v3, s30, 0 -; GFX900-NEXT: v_writelane_b32 v3, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v3, 0 ; GFX900-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v3, 1 -; GFX900-NEXT: v_readlane_b32 s30, v3, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5024,22 +5026,23 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v5, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v5, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v5, s30, 0 -; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: v_mov_b32_e32 v4, v2 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: scratch_store_short v4, v1, off offset:4 sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: scratch_store_dword v4, v0, off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v5, 1 -; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v5, off, s33 ; 4-byte Folded Reload @@ -5057,21 +5060,21 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v3, 0 ; GFX10-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v3, 1 -; GFX10-NEXT: v_readlane_b32 s30, v3, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5089,21 +5092,21 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v3, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v3, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v3, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v3, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v3, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v3, 0 ; GFX11-NEXT: scratch_store_b16 v2, v1, off offset:4 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_store_b32 v2, v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v3, 1 -; GFX11-NEXT: v_readlane_b32 s30, v3, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v3, off, s33 ; 4-byte Folded Reload @@ -5122,21 +5125,21 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v5, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v5, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 -; GFX1250-NEXT: v_mov_b32_e32 v4, v2 ; GFX1250-NEXT: v_writelane_b32 v5, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 +; GFX1250-NEXT: v_mov_b32_e32 v4, v2 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: scratch_store_b16 v4, v1, off offset:4 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: scratch_store_b32 v4, v0, off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v5, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -5190,8 +5193,8 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_short v0, v4, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v8, 1 ; GCN-NEXT: v_readlane_b32 s30, v8, 0 +; GCN-NEXT: v_readlane_b32 s31, v8, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5208,13 +5211,13 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v6, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v6, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v6, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v6, s30, 0 -; GFX7-NEXT: v_writelane_b32 v6, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3 @@ -5231,13 +5234,13 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: buffer_store_short v2, v3, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 2, v4 +; GFX7-NEXT: v_readlane_b32 s30, v6, 0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_store_short v0, v4, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v6, 1 -; GFX7-NEXT: v_readlane_b32 s30, v6, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5254,22 +5257,22 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v4, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v4, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v4, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v4, s30, 0 -; GFX8-NEXT: v_writelane_b32 v4, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 4, v2 +; GFX8-NEXT: v_readlane_b32 s30, v4, 0 ; GFX8-NEXT: buffer_store_dword v1, v3, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v4, 1 -; GFX8-NEXT: v_readlane_b32 s30, v4, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5286,21 +5289,21 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v3, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v3, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v3, s30, 0 -; GFX900-NEXT: v_writelane_b32 v3, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v3, 0 ; GFX900-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen offset:4 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v3, 1 -; GFX900-NEXT: v_readlane_b32 s30, v3, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5317,20 +5320,21 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v5, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v5, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v5, s30, 0 -; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: v_mov_b32_e32 v4, v2 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: scratch_store_dwordx2 v4, v[0:1], off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v5, 1 -; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v5, off, s33 ; 4-byte Folded Reload @@ -5348,21 +5352,21 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v3, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v3, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v3, 0 ; GFX10-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v3, 1 -; GFX10-NEXT: v_readlane_b32 s30, v3, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5380,19 +5384,19 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v3, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v3, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v3, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v3, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v3, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v3, 0 ; GFX11-NEXT: scratch_store_b64 v2, v[0:1], off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v3, 1 -; GFX11-NEXT: v_readlane_b32 s30, v3, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v3, off, s33 ; 4-byte Folded Reload @@ -5411,19 +5415,19 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v5, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v5, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 -; GFX1250-NEXT: v_mov_b32_e32 v4, v2 ; GFX1250-NEXT: v_writelane_b32 v5, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 +; GFX1250-NEXT: v_mov_b32_e32 v4, v2 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v5, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -5497,8 +5501,8 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_short v0, v8, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v16, 1 ; GCN-NEXT: v_readlane_b32 s30, v16, 0 +; GCN-NEXT: v_readlane_b32 s31, v16, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5515,13 +5519,13 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v10, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v10, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v10, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v10, s30, 0 -; GFX7-NEXT: v_writelane_b32 v10, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7 @@ -5558,13 +5562,13 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: buffer_store_short v2, v3, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 2, v8 +; GFX7-NEXT: v_readlane_b32 s30, v10, 0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_store_short v0, v8, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v10, 1 -; GFX7-NEXT: v_readlane_b32 s30, v10, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v10, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5581,13 +5585,13 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v6, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v6, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v6, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v6, s30, 0 -; GFX8-NEXT: v_writelane_b32 v6, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 12, v4 @@ -5597,12 +5601,12 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 4, v4 +; GFX8-NEXT: v_readlane_b32 s30, v6, 0 ; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v6, 1 -; GFX8-NEXT: v_readlane_b32 s30, v6, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5619,15 +5623,16 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v5, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v5, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v5, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v5, s30, 0 -; GFX900-NEXT: v_writelane_b32 v5, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v5, 0 ; GFX900-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen offset:12 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen offset:8 @@ -5637,7 +5642,6 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v5, 1 -; GFX900-NEXT: v_readlane_b32 s30, v5, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v5, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5654,19 +5658,20 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v5, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v5, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v5, s30, 0 -; GFX950-NEXT: v_writelane_b32 v5, s31, 1 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: scratch_store_dwordx4 v4, v[0:3], off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v5, 1 -; GFX950-NEXT: v_readlane_b32 s30, v5, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v5, off, s33 ; 4-byte Folded Reload @@ -5684,15 +5689,16 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v5, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v5, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v5, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v5, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v5, 0 ; GFX10-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen offset:12 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen offset:8 @@ -5702,7 +5708,6 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v5, 1 -; GFX10-NEXT: v_readlane_b32 s30, v5, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5720,19 +5725,19 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v5, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v5, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v5, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v5, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v5, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v5, 0 ; GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v5, 1 -; GFX11-NEXT: v_readlane_b32 s30, v5, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v5, off, s33 ; 4-byte Folded Reload @@ -5751,18 +5756,18 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v5, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v5, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 ; GFX1250-NEXT: v_writelane_b32 v5, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: scratch_store_b128 v4, v[0:3], off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v5, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v5, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -5876,8 +5881,8 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_short v0, v16, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v20, 1 ; GCN-NEXT: v_readlane_b32 s30, v20, 0 +; GCN-NEXT: v_readlane_b32 s31, v20, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 ; 4-byte Folded Reload @@ -5894,13 +5899,13 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX7-NEXT: buffer_store_dword v18, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[16:17] +; GFX7-NEXT: v_writelane_b32 v18, s30, 0 ; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v18, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[16:17] ; GFX7-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX7-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX7-NEXT: v_writelane_b32 v18, s30, 0 -; GFX7-NEXT: v_writelane_b32 v18, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15 @@ -5977,13 +5982,13 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX7-NEXT: buffer_store_short v2, v3, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 2, v16 +; GFX7-NEXT: v_readlane_b32 s30, v18, 0 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: buffer_store_short v1, v2, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: buffer_store_short v0, v16, s[0:3], 0 offen ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readlane_b32 s31, v18, 1 -; GFX7-NEXT: v_readlane_b32 s30, v18, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: buffer_load_dword v18, off, s[0:3], s33 ; 4-byte Folded Reload @@ -6000,13 +6005,13 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX8-NEXT: buffer_store_dword v10, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[16:17] +; GFX8-NEXT: v_writelane_b32 v10, s30, 0 ; GFX8-NEXT: s_addk_i32 s32, 0x400 +; GFX8-NEXT: v_writelane_b32 v10, s31, 1 ; GFX8-NEXT: s_getpc_b64 s[16:17] ; GFX8-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX8-NEXT: v_writelane_b32 v10, s30, 0 -; GFX8-NEXT: v_writelane_b32 v10, s31, 1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX8-NEXT: v_add_u32_e32 v9, vcc, 28, v8 @@ -6028,12 +6033,12 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 4, v8 +; GFX8-NEXT: v_readlane_b32 s30, v10, 0 ; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, v8, s[0:3], 0 offen ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readlane_b32 s31, v10, 1 -; GFX8-NEXT: v_readlane_b32 s30, v10, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v10, off, s[0:3], s33 ; 4-byte Folded Reload @@ -6050,15 +6055,16 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; GFX900-NEXT: buffer_store_dword v9, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[16:17] +; GFX900-NEXT: v_writelane_b32 v9, s30, 0 ; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v9, s31, 1 ; GFX900-NEXT: s_getpc_b64 s[16:17] ; GFX900-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX900-NEXT: v_writelane_b32 v9, s30, 0 -; GFX900-NEXT: v_writelane_b32 v9, s31, 1 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v9, 0 ; GFX900-NEXT: buffer_store_dword v7, v8, s[0:3], 0 offen offset:28 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: buffer_store_dword v6, v8, s[0:3], 0 offen offset:24 @@ -6076,7 +6082,6 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX900-NEXT: buffer_store_dword v0, v8, s[0:3], 0 offen ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_readlane_b32 s31, v9, 1 -; GFX900-NEXT: v_readlane_b32 s30, v9, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v9, off, s[0:3], s33 ; 4-byte Folded Reload @@ -6093,21 +6098,22 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_store_dword off, v9, s33 ; 4-byte Folded Spill ; GFX950-NEXT: s_mov_b64 exec, s[0:1] +; GFX950-NEXT: v_writelane_b32 v9, s30, 0 ; GFX950-NEXT: s_add_i32 s32, s32, 16 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_writelane_b32 v9, s31, 1 ; GFX950-NEXT: s_getpc_b64 s[0:1] ; GFX950-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX950-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 ; GFX950-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX950-NEXT: v_writelane_b32 v9, s30, 0 -; GFX950-NEXT: v_writelane_b32 v9, s31, 1 ; GFX950-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX950-NEXT: v_readlane_b32 s30, v9, 0 ; GFX950-NEXT: scratch_store_dwordx4 v8, v[4:7], off offset:16 sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: scratch_store_dwordx4 v8, v[0:3], off sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_readlane_b32 s31, v9, 1 -; GFX950-NEXT: v_readlane_b32 s30, v9, 0 ; GFX950-NEXT: s_mov_b32 s32, s33 ; GFX950-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX950-NEXT: scratch_load_dword v9, off, s33 ; 4-byte Folded Reload @@ -6125,15 +6131,16 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v9, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s16 +; GFX10-NEXT: v_writelane_b32 v9, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v9, s31, 1 ; GFX10-NEXT: s_getpc_b64 s[16:17] ; GFX10-NEXT: s_add_u32 s16, s16, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s17, s17, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v9, s30, 0 ; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX10-NEXT: v_writelane_b32 v9, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX10-NEXT: v_readlane_b32 s30, v9, 0 ; GFX10-NEXT: buffer_store_dword v7, v8, s[0:3], 0 offen offset:28 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: buffer_store_dword v6, v8, s[0:3], 0 offen offset:24 @@ -6151,7 +6158,6 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX10-NEXT: buffer_store_dword v0, v8, s[0:3], 0 offen ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_readlane_b32 s31, v9, 1 -; GFX10-NEXT: v_readlane_b32 s30, v9, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v9, off, s[0:3], s33 ; 4-byte Folded Reload @@ -6169,21 +6175,21 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v9, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v9, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v9, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, test_arg_store_v2bf16@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, test_arg_store_v2bf16@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v9, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v9, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v9, 0 ; GFX11-NEXT: scratch_store_b128 v8, v[4:7], off offset:16 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_store_b128 v8, v[0:3], off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_readlane_b32 s31, v9, 1 -; GFX11-NEXT: v_readlane_b32 s30, v9, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v9, off, s33 ; 4-byte Folded Reload @@ -6202,20 +6208,20 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) { ; GFX1250-NEXT: scratch_store_b32 off, v9, s33 ; 4-byte Folded Spill ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_mov_b32 exec_lo, s0 -; GFX1250-NEXT: s_get_pc_i64 s[0:1] -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 ; GFX1250-NEXT: v_writelane_b32 v9, s30, 0 -; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_add_co_i32 s32, s32, 16 ; GFX1250-NEXT: v_writelane_b32 v9, s31, 1 +; GFX1250-NEXT: s_get_pc_i64 s[0:1] +; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[0:1], test_arg_store_v2bf16@gotpcrel+4 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-NEXT: v_readlane_b32 s30, v9, 0 ; GFX1250-NEXT: scratch_store_b128 v8, v[4:7], off offset:16 scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: scratch_store_b128 v8, v[0:3], off scope:SCOPE_SYS ; GFX1250-NEXT: s_wait_storecnt 0x0 ; GFX1250-NEXT: v_readlane_b32 s31, v9, 1 -; GFX1250-NEXT: v_readlane_b32 s30, v9, 0 ; GFX1250-NEXT: s_mov_b32 s32, s33 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: s_xor_saveexec_b32 s0, -1 @@ -9518,6 +9524,17 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX8-LABEL: global_extload_v32bf16_to_v32f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v58, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 2, v1 ; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 4, v1 @@ -9552,17 +9569,6 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX8-NEXT: v_addc_u32_e32 v34, vcc, 0, v2, vcc ; GFX8-NEXT: v_add_u32_e32 v35, vcc, 36, v1 ; GFX8-NEXT: v_addc_u32_e32 v36, vcc, 0, v2, vcc -; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v58, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX8-NEXT: v_add_u32_e32 v37, vcc, 38, v1 ; GFX8-NEXT: flat_load_ushort v44, v[1:2] ; GFX8-NEXT: v_addc_u32_e32 v38, vcc, 0, v2, vcc @@ -10021,16 +10027,21 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX950-LABEL: global_extload_v32bf16_to_v32f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_mov_b32_e32 v3, v2 -; GFX950-NEXT: v_mov_b32_e32 v2, v1 +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX950-NEXT: v_mov_b32_e32 v3, v2 +; GFX950-NEXT: v_mov_b32_e32 v2, v1 ; GFX950-NEXT: global_load_ushort v1, v[2:3], off offset:2 ; GFX950-NEXT: global_load_ushort v4, v[2:3], off offset:12 ; GFX950-NEXT: global_load_ushort v5, v[2:3], off offset:8 @@ -10063,11 +10074,6 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX950-NEXT: global_load_ushort v56, v[2:3], off offset:48 ; GFX950-NEXT: global_load_ushort v57, v[2:3], off offset:54 ; GFX950-NEXT: global_load_ushort v58, v[2:3], off offset:58 -; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse ; GFX950-NEXT: s_waitcnt vmcnt(31) ; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX950-NEXT: s_waitcnt vmcnt(30) @@ -14251,12 +14257,12 @@ define <32 x bfloat> @v_fadd_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) { ; GFX950-LABEL: v_fadd_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_and_b32_e32 v53, 0xffff0000, v24 ; GFX950-NEXT: v_and_b32_e32 v54, 0xffff0000, v8 ; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v24 ; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v32, 0xffff0000, v15 ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v23 ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v7 @@ -19959,12 +19965,12 @@ define <32 x bfloat> @v_fmul_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) { ; GFX950-LABEL: v_fmul_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_and_b32_e32 v53, 0xffff0000, v24 ; GFX950-NEXT: v_and_b32_e32 v54, 0xffff0000, v8 ; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v24 ; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v32, 0xffff0000, v15 ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v23 ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v7 @@ -25150,12 +25156,12 @@ define <32 x bfloat> @v_minnum_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) { ; GFX950-LABEL: v_minnum_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_and_b32_e32 v53, 0xffff0000, v24 ; GFX950-NEXT: v_and_b32_e32 v54, 0xffff0000, v8 ; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v24 ; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v32, 0xffff0000, v15 ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v23 ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v7 @@ -29726,12 +29732,12 @@ define <32 x bfloat> @v_maxnum_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) { ; GFX950-LABEL: v_maxnum_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_and_b32_e32 v53, 0xffff0000, v24 ; GFX950-NEXT: v_and_b32_e32 v54, 0xffff0000, v8 ; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v24 ; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v32, 0xffff0000, v15 ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v23 ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v7 @@ -48801,6 +48807,14 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v34, s34, 0 +; GFX8-NEXT: v_writelane_b32 v34, s35, 1 +; GFX8-NEXT: v_writelane_b32 v34, s36, 2 +; GFX8-NEXT: v_writelane_b32 v34, s37, 3 +; GFX8-NEXT: v_writelane_b32 v34, s38, 4 +; GFX8-NEXT: v_writelane_b32 v34, s39, 5 +; GFX8-NEXT: v_writelane_b32 v34, s30, 6 +; GFX8-NEXT: v_writelane_b32 v34, s31, 7 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v1 @@ -48852,26 +48866,18 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: v_and_b32_e32 v0, 1, v24 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v25 -; GFX8-NEXT: v_writelane_b32 v34, s30, 0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[78:79], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v26 -; GFX8-NEXT: v_writelane_b32 v34, s31, 1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v27 -; GFX8-NEXT: v_writelane_b32 v34, s34, 2 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v28 -; GFX8-NEXT: v_writelane_b32 v34, s35, 3 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v29 -; GFX8-NEXT: v_writelane_b32 v34, s36, 4 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v30 -; GFX8-NEXT: v_writelane_b32 v34, s37, 5 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0 ; GFX8-NEXT: buffer_load_ushort v0, off, s[0:3], s32 -; GFX8-NEXT: v_writelane_b32 v34, s38, 6 -; GFX8-NEXT: v_writelane_b32 v34, s39, 7 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0 @@ -48997,6 +49003,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: v_lshlrev_b32_e32 v13, 16, v28 ; GFX8-NEXT: v_lshlrev_b32_e32 v14, 16, v26 ; GFX8-NEXT: v_lshlrev_b32_e32 v15, 16, v24 +; GFX8-NEXT: v_readlane_b32 s30, v34, 6 ; GFX8-NEXT: v_or_b32_sdwa v8, v16, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v9, v18, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v10, v20, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -49005,14 +49012,13 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: v_or_b32_sdwa v13, v29, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v14, v27, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v15, v25, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-NEXT: v_readlane_b32 s39, v34, 7 -; GFX8-NEXT: v_readlane_b32 s38, v34, 6 -; GFX8-NEXT: v_readlane_b32 s37, v34, 5 -; GFX8-NEXT: v_readlane_b32 s36, v34, 4 -; GFX8-NEXT: v_readlane_b32 s35, v34, 3 -; GFX8-NEXT: v_readlane_b32 s34, v34, 2 -; GFX8-NEXT: v_readlane_b32 s31, v34, 1 -; GFX8-NEXT: v_readlane_b32 s30, v34, 0 +; GFX8-NEXT: v_readlane_b32 s31, v34, 7 +; GFX8-NEXT: v_readlane_b32 s39, v34, 5 +; GFX8-NEXT: v_readlane_b32 s38, v34, 4 +; GFX8-NEXT: v_readlane_b32 s37, v34, 3 +; GFX8-NEXT: v_readlane_b32 s36, v34, 2 +; GFX8-NEXT: v_readlane_b32 s35, v34, 1 +; GFX8-NEXT: v_readlane_b32 s34, v34, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] @@ -49025,6 +49031,10 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v33, s34, 0 +; GFX900-NEXT: v_writelane_b32 v33, s35, 1 +; GFX900-NEXT: v_writelane_b32 v33, s30, 2 +; GFX900-NEXT: v_writelane_b32 v33, s31, 3 ; GFX900-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX900-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0 ; GFX900-NEXT: v_and_b32_e32 v0, 1, v3 @@ -49084,11 +49094,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX900-NEXT: v_and_b32_e32 v0, 1, v28 ; GFX900-NEXT: v_cmp_eq_u32_e64 s[94:95], 1, v0 ; GFX900-NEXT: buffer_load_ushort v0, off, s[0:3], s32 -; GFX900-NEXT: v_writelane_b32 v33, s30, 0 -; GFX900-NEXT: v_writelane_b32 v33, s31, 1 -; GFX900-NEXT: v_writelane_b32 v33, s34, 2 ; GFX900-NEXT: v_and_b32_e32 v1, 1, v1 -; GFX900-NEXT: v_writelane_b32 v33, s35, 3 ; GFX900-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_and_b32_e32 v0, 1, v0 @@ -49193,6 +49199,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX900-NEXT: s_mov_b32 s4, 0x5040100 +; GFX900-NEXT: v_readlane_b32 s30, v33, 2 ; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 ; GFX900-NEXT: v_perm_b32 v1, v2, v5, s4 ; GFX900-NEXT: v_perm_b32 v2, v4, v7, s4 @@ -49209,10 +49216,9 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX900-NEXT: v_perm_b32 v13, v26, v29, s4 ; GFX900-NEXT: v_perm_b32 v14, v28, v32, s4 ; GFX900-NEXT: v_perm_b32 v15, v31, v30, s4 -; GFX900-NEXT: v_readlane_b32 s35, v33, 3 -; GFX900-NEXT: v_readlane_b32 s34, v33, 2 -; GFX900-NEXT: v_readlane_b32 s31, v33, 1 -; GFX900-NEXT: v_readlane_b32 s30, v33, 0 +; GFX900-NEXT: v_readlane_b32 s31, v33, 3 +; GFX900-NEXT: v_readlane_b32 s35, v33, 1 +; GFX900-NEXT: v_readlane_b32 s34, v33, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -49228,6 +49234,12 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v31, off, s32 offset:60 ; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:124 ; GFX950-NEXT: scratch_load_ushort v33, off, s32 @@ -49252,17 +49264,11 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:104 ; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:40 ; GFX950-NEXT: v_and_b32_e32 v29, 1, v29 -; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v29 ; GFX950-NEXT: scratch_load_dword v29, off, s32 offset:84 ; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:20 ; GFX950-NEXT: v_and_b32_e32 v28, 1, v28 -; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse ; GFX950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v28 -; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v26, 1, v26 ; GFX950-NEXT: v_and_b32_e32 v27, 1, v27 ; GFX950-NEXT: v_and_b32_e32 v24, 1, v24 @@ -54681,6 +54687,22 @@ define <32 x bfloat> @v_fma_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b, <32 x bf ; GFX950-LABEL: v_fma_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:64 ; GFX950-NEXT: scratch_load_dword v36, off, s32 ; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:60 @@ -54698,14 +54720,6 @@ define <32 x bfloat> @v_fma_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b, <32 x bf ; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:16 ; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:20 ; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:24 -; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v43, 0xffff0000, v14 ; GFX950-NEXT: v_lshlrev_b32_e32 v45, 16, v14 ; GFX950-NEXT: v_and_b32_e32 v46, 0xffff0000, v29 @@ -54714,20 +54728,12 @@ define <32 x bfloat> @v_fma_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b, <32 x bf ; GFX950-NEXT: v_lshlrev_b32_e32 v61, 16, v12 ; GFX950-NEXT: v_and_b32_e32 v62, 0xffff0000, v27 ; GFX950-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v42, 0xffff0000, v30 ; GFX950-NEXT: v_lshlrev_b32_e32 v44, 16, v30 ; GFX950-NEXT: v_and_b32_e32 v47, 0xffff0000, v13 ; GFX950-NEXT: v_lshlrev_b32_e32 v57, 16, v13 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v15 ; GFX950-NEXT: v_lshlrev_b32_e32 v41, 16, v15 -; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v58, 0xffff0000, v28 ; GFX950-NEXT: v_lshlrev_b32_e32 v60, 16, v28 ; GFX950-NEXT: s_waitcnt vmcnt(16) diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index d89b39348ad9a..863177ae3d6b5 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -7,6 +7,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr17, $sgpr12_sgpr13 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX90A-NEXT: $sgpr32 = S_MOV_B32 0 ; GFX90A-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr12, $sgpr17, implicit-def $scc ; GFX90A-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll index ab2ad19d0f1bf..2f6f9e45cafbf 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -902,47 +902,47 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt expcnt(0) -; CHECK-NEXT: v_writelane_b32 v0, s30, 0 -; CHECK-NEXT: v_writelane_b32 v0, s31, 1 -; CHECK-NEXT: v_writelane_b32 v0, s33, 2 -; CHECK-NEXT: v_writelane_b32 v0, s34, 3 -; CHECK-NEXT: v_writelane_b32 v0, s35, 4 -; CHECK-NEXT: v_writelane_b32 v0, s36, 5 -; CHECK-NEXT: v_writelane_b32 v0, s37, 6 -; CHECK-NEXT: v_writelane_b32 v0, s38, 7 -; CHECK-NEXT: v_writelane_b32 v0, s39, 8 -; CHECK-NEXT: v_writelane_b32 v0, s48, 9 -; CHECK-NEXT: v_writelane_b32 v0, s49, 10 -; CHECK-NEXT: v_writelane_b32 v0, s50, 11 -; CHECK-NEXT: v_writelane_b32 v0, s51, 12 -; CHECK-NEXT: v_writelane_b32 v0, s52, 13 -; CHECK-NEXT: v_writelane_b32 v0, s53, 14 -; CHECK-NEXT: v_writelane_b32 v0, s54, 15 -; CHECK-NEXT: v_writelane_b32 v0, s55, 16 -; CHECK-NEXT: v_writelane_b32 v0, s64, 17 -; CHECK-NEXT: v_writelane_b32 v0, s65, 18 -; CHECK-NEXT: v_writelane_b32 v0, s66, 19 -; CHECK-NEXT: v_writelane_b32 v0, s67, 20 -; CHECK-NEXT: v_writelane_b32 v0, s68, 21 -; CHECK-NEXT: v_writelane_b32 v0, s69, 22 -; CHECK-NEXT: v_writelane_b32 v0, s70, 23 -; CHECK-NEXT: v_writelane_b32 v0, s71, 24 -; CHECK-NEXT: v_writelane_b32 v0, s80, 25 -; CHECK-NEXT: v_writelane_b32 v0, s81, 26 -; CHECK-NEXT: v_writelane_b32 v0, s82, 27 -; CHECK-NEXT: v_writelane_b32 v0, s83, 28 -; CHECK-NEXT: v_writelane_b32 v0, s84, 29 -; CHECK-NEXT: v_writelane_b32 v0, s85, 30 -; CHECK-NEXT: v_writelane_b32 v0, s86, 31 -; CHECK-NEXT: v_writelane_b32 v0, s87, 32 -; CHECK-NEXT: v_writelane_b32 v0, s96, 33 -; CHECK-NEXT: v_writelane_b32 v0, s97, 34 -; CHECK-NEXT: v_writelane_b32 v0, s98, 35 -; CHECK-NEXT: v_writelane_b32 v0, s99, 36 +; CHECK-NEXT: v_writelane_b32 v0, s33, 0 +; CHECK-NEXT: v_writelane_b32 v0, s34, 1 +; CHECK-NEXT: v_writelane_b32 v0, s35, 2 +; CHECK-NEXT: v_writelane_b32 v0, s36, 3 +; CHECK-NEXT: v_writelane_b32 v0, s37, 4 +; CHECK-NEXT: v_writelane_b32 v0, s38, 5 +; CHECK-NEXT: v_writelane_b32 v0, s39, 6 +; CHECK-NEXT: v_writelane_b32 v0, s48, 7 +; CHECK-NEXT: v_writelane_b32 v0, s49, 8 +; CHECK-NEXT: v_writelane_b32 v0, s50, 9 +; CHECK-NEXT: v_writelane_b32 v0, s51, 10 +; CHECK-NEXT: v_writelane_b32 v0, s52, 11 +; CHECK-NEXT: v_writelane_b32 v0, s53, 12 +; CHECK-NEXT: v_writelane_b32 v0, s54, 13 +; CHECK-NEXT: v_writelane_b32 v0, s55, 14 +; CHECK-NEXT: v_writelane_b32 v0, s64, 15 +; CHECK-NEXT: v_writelane_b32 v0, s65, 16 +; CHECK-NEXT: v_writelane_b32 v0, s66, 17 +; CHECK-NEXT: v_writelane_b32 v0, s67, 18 +; CHECK-NEXT: v_writelane_b32 v0, s68, 19 +; CHECK-NEXT: v_writelane_b32 v0, s69, 20 +; CHECK-NEXT: v_writelane_b32 v0, s70, 21 +; CHECK-NEXT: v_writelane_b32 v0, s71, 22 +; CHECK-NEXT: v_writelane_b32 v0, s80, 23 +; CHECK-NEXT: v_writelane_b32 v0, s81, 24 +; CHECK-NEXT: v_writelane_b32 v0, s82, 25 +; CHECK-NEXT: v_writelane_b32 v0, s83, 26 +; CHECK-NEXT: v_writelane_b32 v0, s84, 27 +; CHECK-NEXT: v_writelane_b32 v0, s85, 28 +; CHECK-NEXT: v_writelane_b32 v0, s86, 29 +; CHECK-NEXT: v_writelane_b32 v0, s87, 30 +; CHECK-NEXT: v_writelane_b32 v0, s96, 31 +; CHECK-NEXT: v_writelane_b32 v0, s97, 32 +; CHECK-NEXT: v_writelane_b32 v0, s98, 33 +; CHECK-NEXT: v_writelane_b32 v0, s99, 34 +; CHECK-NEXT: v_writelane_b32 v0, s100, 35 +; CHECK-NEXT: v_writelane_b32 v0, s101, 36 +; CHECK-NEXT: v_writelane_b32 v0, s30, 37 +; CHECK-NEXT: v_writelane_b32 v0, s31, 38 ; CHECK-NEXT: s_mov_b32 s40, s12 -; CHECK-NEXT: v_writelane_b32 v0, s100, 37 ; CHECK-NEXT: s_cmp_eq_u32 s40, 0 -; CHECK-NEXT: v_writelane_b32 v0, s101, 38 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: ;;#ASMEND @@ -1380,6 +1380,7 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s31 ; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s30, v0, 37 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s32 ; CHECK-NEXT: ;;#ASMEND @@ -1596,45 +1597,44 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use vcc_hi ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s101, v0, 38 -; CHECK-NEXT: v_readlane_b32 s100, v0, 37 -; CHECK-NEXT: v_readlane_b32 s99, v0, 36 -; CHECK-NEXT: v_readlane_b32 s98, v0, 35 -; CHECK-NEXT: v_readlane_b32 s97, v0, 34 -; CHECK-NEXT: v_readlane_b32 s96, v0, 33 -; CHECK-NEXT: v_readlane_b32 s87, v0, 32 -; CHECK-NEXT: v_readlane_b32 s86, v0, 31 -; CHECK-NEXT: v_readlane_b32 s85, v0, 30 -; CHECK-NEXT: v_readlane_b32 s84, v0, 29 -; CHECK-NEXT: v_readlane_b32 s83, v0, 28 -; CHECK-NEXT: v_readlane_b32 s82, v0, 27 -; CHECK-NEXT: v_readlane_b32 s81, v0, 26 -; CHECK-NEXT: v_readlane_b32 s80, v0, 25 -; CHECK-NEXT: v_readlane_b32 s71, v0, 24 -; CHECK-NEXT: v_readlane_b32 s70, v0, 23 -; CHECK-NEXT: v_readlane_b32 s69, v0, 22 -; CHECK-NEXT: v_readlane_b32 s68, v0, 21 -; CHECK-NEXT: v_readlane_b32 s67, v0, 20 -; CHECK-NEXT: v_readlane_b32 s66, v0, 19 -; CHECK-NEXT: v_readlane_b32 s65, v0, 18 -; CHECK-NEXT: v_readlane_b32 s64, v0, 17 -; CHECK-NEXT: v_readlane_b32 s55, v0, 16 -; CHECK-NEXT: v_readlane_b32 s54, v0, 15 -; CHECK-NEXT: v_readlane_b32 s53, v0, 14 -; CHECK-NEXT: v_readlane_b32 s52, v0, 13 -; CHECK-NEXT: v_readlane_b32 s51, v0, 12 -; CHECK-NEXT: v_readlane_b32 s50, v0, 11 -; CHECK-NEXT: v_readlane_b32 s49, v0, 10 -; CHECK-NEXT: v_readlane_b32 s48, v0, 9 -; CHECK-NEXT: v_readlane_b32 s39, v0, 8 -; CHECK-NEXT: v_readlane_b32 s38, v0, 7 -; CHECK-NEXT: v_readlane_b32 s37, v0, 6 -; CHECK-NEXT: v_readlane_b32 s36, v0, 5 -; CHECK-NEXT: v_readlane_b32 s35, v0, 4 -; CHECK-NEXT: v_readlane_b32 s34, v0, 3 -; CHECK-NEXT: v_readlane_b32 s33, v0, 2 -; CHECK-NEXT: v_readlane_b32 s31, v0, 1 -; CHECK-NEXT: v_readlane_b32 s30, v0, 0 +; CHECK-NEXT: v_readlane_b32 s31, v0, 38 +; CHECK-NEXT: v_readlane_b32 s101, v0, 36 +; CHECK-NEXT: v_readlane_b32 s100, v0, 35 +; CHECK-NEXT: v_readlane_b32 s99, v0, 34 +; CHECK-NEXT: v_readlane_b32 s98, v0, 33 +; CHECK-NEXT: v_readlane_b32 s97, v0, 32 +; CHECK-NEXT: v_readlane_b32 s96, v0, 31 +; CHECK-NEXT: v_readlane_b32 s87, v0, 30 +; CHECK-NEXT: v_readlane_b32 s86, v0, 29 +; CHECK-NEXT: v_readlane_b32 s85, v0, 28 +; CHECK-NEXT: v_readlane_b32 s84, v0, 27 +; CHECK-NEXT: v_readlane_b32 s83, v0, 26 +; CHECK-NEXT: v_readlane_b32 s82, v0, 25 +; CHECK-NEXT: v_readlane_b32 s81, v0, 24 +; CHECK-NEXT: v_readlane_b32 s80, v0, 23 +; CHECK-NEXT: v_readlane_b32 s71, v0, 22 +; CHECK-NEXT: v_readlane_b32 s70, v0, 21 +; CHECK-NEXT: v_readlane_b32 s69, v0, 20 +; CHECK-NEXT: v_readlane_b32 s68, v0, 19 +; CHECK-NEXT: v_readlane_b32 s67, v0, 18 +; CHECK-NEXT: v_readlane_b32 s66, v0, 17 +; CHECK-NEXT: v_readlane_b32 s65, v0, 16 +; CHECK-NEXT: v_readlane_b32 s64, v0, 15 +; CHECK-NEXT: v_readlane_b32 s55, v0, 14 +; CHECK-NEXT: v_readlane_b32 s54, v0, 13 +; CHECK-NEXT: v_readlane_b32 s53, v0, 12 +; CHECK-NEXT: v_readlane_b32 s52, v0, 11 +; CHECK-NEXT: v_readlane_b32 s51, v0, 10 +; CHECK-NEXT: v_readlane_b32 s50, v0, 9 +; CHECK-NEXT: v_readlane_b32 s49, v0, 8 +; CHECK-NEXT: v_readlane_b32 s48, v0, 7 +; CHECK-NEXT: v_readlane_b32 s39, v0, 6 +; CHECK-NEXT: v_readlane_b32 s38, v0, 5 +; CHECK-NEXT: v_readlane_b32 s37, v0, 4 +; CHECK-NEXT: v_readlane_b32 s36, v0, 3 +; CHECK-NEXT: v_readlane_b32 s35, v0, 2 +; CHECK-NEXT: v_readlane_b32 s34, v0, 1 +; CHECK-NEXT: v_readlane_b32 s33, v0, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir index 7336a54ae42db..72b6b9f9ec686 100644 --- a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir @@ -19,11 +19,17 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $agpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $agpr0, 0 ; GCN-NEXT: $exec = S_MOV_B64 -1 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr62, 256 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF ; GCN-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, killed $vgpr62 diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll index d1cede64ce71d..445250d4e77e4 100644 --- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll @@ -41,16 +41,16 @@ define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i8_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i8_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -69,16 +69,16 @@ define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -101,16 +101,16 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -129,16 +129,16 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -161,16 +161,16 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -189,16 +189,16 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -221,17 +221,17 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i64_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i64_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -250,16 +250,16 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i64_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i64_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -282,17 +282,17 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -311,16 +311,16 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -343,18 +343,18 @@ define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s19, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[20:21] ; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v3i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v3i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -373,16 +373,16 @@ define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s16 ; GFX11-NEXT: v_writelane_b32 v40, s3, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v3i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -405,8 +405,9 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-NEXT: v_writelane_b32 v40, s20, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[20:21] ; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v4i32_inreg@rel32@hi+12 @@ -414,10 +415,9 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 { ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -436,16 +436,16 @@ define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s17 ; GFX11-NEXT: v_writelane_b32 v40, s16, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -468,8 +468,9 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[26:27] ; GFX9-NEXT: v_writelane_b32 v40, s24, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[24:25] ; GFX9-NEXT: s_add_u32 s24, s24, external_void_func_v8i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s25, s25, external_void_func_v8i32_inreg@rel32@hi+12 @@ -481,10 +482,9 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 { ; GFX9-NEXT: s_mov_b32 s17, s21 ; GFX9-NEXT: s_mov_b32 s18, s22 ; GFX9-NEXT: s_mov_b32 s19, s23 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[24:25] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -503,16 +503,16 @@ define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s21 ; GFX11-NEXT: v_writelane_b32 v40, s20, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[20:21] ; GFX11-NEXT: s_add_u32 s20, s20, external_void_func_v8i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s21, s21, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -535,16 +535,16 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -563,16 +563,16 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -595,16 +595,16 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -623,16 +623,16 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -655,16 +655,16 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f32_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -683,16 +683,16 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -715,17 +715,17 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f64_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f64_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -744,16 +744,16 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f64_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f64_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -776,16 +776,16 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2f16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -804,16 +804,16 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -837,16 +837,16 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -865,16 +865,16 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -897,17 +897,17 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v3f16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -926,16 +926,16 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -958,17 +958,17 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v4f16_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v4f16_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -987,16 +987,16 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4f16_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4f16_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1019,17 +1019,17 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p0_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p0_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1048,16 +1048,16 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p0_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p0_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1080,17 +1080,17 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p1_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p1_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1109,16 +1109,16 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p1_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p1_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1141,16 +1141,16 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s17, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p3_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p3_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1169,16 +1169,16 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s2 ; GFX11-NEXT: v_writelane_b32 v40, s1, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p3_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p3_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1201,8 +1201,9 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-NEXT: v_writelane_b32 v40, s20, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[20:21] ; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v2p1_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v2p1_inreg@rel32@hi+12 @@ -1210,10 +1211,9 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1232,16 +1232,16 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s17 ; GFX11-NEXT: v_writelane_b32 v40, s16, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v2p1_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v2p1_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1264,17 +1264,17 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-NEXT: v_writelane_b32 v40, s18, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[18:19] ; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2p5_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2p5_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1293,16 +1293,16 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s3 ; GFX11-NEXT: v_writelane_b32 v40, s2, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2p5_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2p5_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1325,8 +1325,9 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-NEXT: v_writelane_b32 v40, s21, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[22:23] ; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12 @@ -1335,10 +1336,9 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 ; GFX9-NEXT: s_mov_b32 s16, s20 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1357,16 +1357,16 @@ define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s18 ; GFX11-NEXT: v_writelane_b32 v40, s17, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[18:19] ; GFX11-NEXT: s_add_u32 s18, s18, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s19, s19, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1389,8 +1389,9 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-NEXT: v_writelane_b32 v40, s29, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[40:41] ; GFX9-NEXT: s_add_u32 s40, s40, external_void_func_a15i32_inreg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s41, s41, external_void_func_a15i32_inreg@rel32@hi+12 @@ -1407,10 +1408,9 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; GFX9-NEXT: s_mov_b32 s22, s26 ; GFX9-NEXT: s_mov_b32 s23, s27 ; GFX9-NEXT: s_mov_b32 s24, s28 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[40:41] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1429,16 +1429,16 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s26 ; GFX11-NEXT: v_writelane_b32 v40, s25, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[26:27] ; GFX11-NEXT: s_add_u32 s26, s26, external_void_func_a15i32_inreg@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s27, s27, external_void_func_a15i32_inreg@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[26:27] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1463,8 +1463,9 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-NEXT: v_writelane_b32 v40, s21, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[22:23] ; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 @@ -1482,10 +1483,9 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre ; GFX9-NEXT: s_mov_b32 s11, s18 ; GFX9-NEXT: s_mov_b32 s15, s19 ; GFX9-NEXT: s_mov_b32 s16, s20 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1504,16 +1504,16 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s18 ; GFX11-NEXT: v_writelane_b32 v40, s17, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[18:19] ; GFX11-NEXT: s_add_u32 s18, s18, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s19, s19, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index 8e12e7e03947b..4e0b16792aad4 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -5852,7 +5852,10 @@ define void @stack_12xv3i32() #0 { ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] +; VI-NEXT: v_writelane_b32 v40, s4, 2 +; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: s_addk_i32 s32, 0x400 +; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: v_mov_b32_e32 v0, 11 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 12 @@ -5860,10 +5863,8 @@ define void @stack_12xv3i32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 13 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; VI-NEXT: v_mov_b32_e32 v0, 14 -; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; VI-NEXT: v_mov_b32_e32 v0, 15 -; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 @@ -5899,10 +5900,9 @@ define void @stack_12xv3i32() #0 { ; VI-NEXT: v_mov_b32_e32 v28, 9 ; VI-NEXT: v_mov_b32_e32 v29, 9 ; VI-NEXT: v_mov_b32_e32 v30, 10 -; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 +; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -5920,7 +5920,10 @@ define void @stack_12xv3i32() #0 { ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] +; CI-NEXT: v_writelane_b32 v40, s4, 2 +; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: s_addk_i32 s32, 0x400 +; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: v_mov_b32_e32 v0, 11 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 12 @@ -5928,10 +5931,8 @@ define void @stack_12xv3i32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 13 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; CI-NEXT: v_mov_b32_e32 v0, 14 -; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; CI-NEXT: v_mov_b32_e32 v0, 15 -; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 @@ -5967,10 +5968,9 @@ define void @stack_12xv3i32() #0 { ; CI-NEXT: v_mov_b32_e32 v28, 9 ; CI-NEXT: v_mov_b32_e32 v29, 9 ; CI-NEXT: v_mov_b32_e32 v30, 10 -; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 +; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -5988,7 +5988,10 @@ define void @stack_12xv3i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-NEXT: v_writelane_b32 v40, s4, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 11 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 12 @@ -5996,10 +5999,8 @@ define void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 14 -; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 @@ -6035,10 +6036,9 @@ define void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v28, 9 ; GFX9-NEXT: v_mov_b32_e32 v29, 9 ; GFX9-NEXT: v_mov_b32_e32 v30, 10 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6057,11 +6057,12 @@ define void @stack_12xv3i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 11 :: v_dual_mov_b32 v1, 12 ; GFX11-NEXT: v_dual_mov_b32 v2, 13 :: v_dual_mov_b32 v3, 14 ; GFX11-NEXT: v_mov_b32_e32 v4, 15 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b32 off, v4, s0 @@ -6084,11 +6085,10 @@ define void @stack_12xv3i32() #0 { ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6106,7 +6106,10 @@ define void @stack_12xv3i32() #0 { ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] +; HSA-NEXT: v_writelane_b32 v40, s4, 2 +; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: s_addk_i32 s32, 0x400 +; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: v_mov_b32_e32 v0, 11 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 12 @@ -6114,10 +6117,8 @@ define void @stack_12xv3i32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 13 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; HSA-NEXT: v_mov_b32_e32 v0, 14 -; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; HSA-NEXT: v_mov_b32_e32 v0, 15 -; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 @@ -6153,10 +6154,9 @@ define void @stack_12xv3i32() #0 { ; HSA-NEXT: v_mov_b32_e32 v28, 9 ; HSA-NEXT: v_mov_b32_e32 v29, 9 ; HSA-NEXT: v_mov_b32_e32 v30, 10 -; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] -; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 +; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6191,7 +6191,10 @@ define void @stack_12xv3f32() #0 { ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] +; VI-NEXT: v_writelane_b32 v40, s4, 2 +; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: s_addk_i32 s32, 0x400 +; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: v_mov_b32_e32 v0, 0x41300000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 0x41400000 @@ -6199,10 +6202,8 @@ define void @stack_12xv3f32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000 -; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000 -; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 @@ -6238,10 +6239,9 @@ define void @stack_12xv3f32() #0 { ; VI-NEXT: v_mov_b32_e32 v28, 0x41100000 ; VI-NEXT: v_mov_b32_e32 v29, 0x41100000 ; VI-NEXT: v_mov_b32_e32 v30, 0x41200000 -; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 +; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6259,7 +6259,10 @@ define void @stack_12xv3f32() #0 { ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] +; CI-NEXT: v_writelane_b32 v40, s4, 2 +; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: s_addk_i32 s32, 0x400 +; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: v_mov_b32_e32 v0, 0x41300000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 0x41400000 @@ -6267,10 +6270,8 @@ define void @stack_12xv3f32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000 -; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000 -; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 @@ -6306,10 +6307,9 @@ define void @stack_12xv3f32() #0 { ; CI-NEXT: v_mov_b32_e32 v28, 0x41100000 ; CI-NEXT: v_mov_b32_e32 v29, 0x41100000 ; CI-NEXT: v_mov_b32_e32 v30, 0x41200000 -; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 +; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6327,7 +6327,10 @@ define void @stack_12xv3f32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-NEXT: v_writelane_b32 v40, s4, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000 @@ -6335,10 +6338,8 @@ define void @stack_12xv3f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 -; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 @@ -6374,10 +6375,9 @@ define void @stack_12xv3f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v28, 0x41100000 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x41100000 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x41200000 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6396,13 +6396,14 @@ define void @stack_12xv3f32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x41300000 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41400000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41500000 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41600000 ; GFX11-NEXT: v_dual_mov_b32 v4, 0x41700000 :: v_dual_mov_b32 v5, 1.0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b32 off, v4, s0 @@ -6427,11 +6428,10 @@ define void @stack_12xv3f32() #0 { ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3f32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3f32@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6449,7 +6449,10 @@ define void @stack_12xv3f32() #0 { ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] +; HSA-NEXT: v_writelane_b32 v40, s4, 2 +; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: s_addk_i32 s32, 0x400 +; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41300000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41400000 @@ -6457,10 +6460,8 @@ define void @stack_12xv3f32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000 -; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000 -; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 @@ -6496,10 +6497,9 @@ define void @stack_12xv3f32() #0 { ; HSA-NEXT: v_mov_b32_e32 v28, 0x41100000 ; HSA-NEXT: v_mov_b32_e32 v29, 0x41100000 ; HSA-NEXT: v_mov_b32_e32 v30, 0x41200000 -; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] -; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 +; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6534,7 +6534,10 @@ define void @stack_8xv5i32() #0 { ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] +; VI-NEXT: v_writelane_b32 v40, s4, 2 +; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: s_addk_i32 s32, 0x400 +; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: v_mov_b32_e32 v0, 7 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 8 @@ -6550,10 +6553,8 @@ define void @stack_8xv5i32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 13 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; VI-NEXT: v_mov_b32_e32 v0, 14 -; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; VI-NEXT: v_mov_b32_e32 v0, 15 -; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 @@ -6589,10 +6590,9 @@ define void @stack_8xv5i32() #0 { ; VI-NEXT: v_mov_b32_e32 v28, 5 ; VI-NEXT: v_mov_b32_e32 v29, 5 ; VI-NEXT: v_mov_b32_e32 v30, 6 -; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 +; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6610,7 +6610,10 @@ define void @stack_8xv5i32() #0 { ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] +; CI-NEXT: v_writelane_b32 v40, s4, 2 +; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: s_addk_i32 s32, 0x400 +; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: v_mov_b32_e32 v0, 7 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 8 @@ -6626,10 +6629,8 @@ define void @stack_8xv5i32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 13 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; CI-NEXT: v_mov_b32_e32 v0, 14 -; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; CI-NEXT: v_mov_b32_e32 v0, 15 -; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 @@ -6665,10 +6666,9 @@ define void @stack_8xv5i32() #0 { ; CI-NEXT: v_mov_b32_e32 v28, 5 ; CI-NEXT: v_mov_b32_e32 v29, 5 ; CI-NEXT: v_mov_b32_e32 v30, 6 -; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 +; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6686,7 +6686,10 @@ define void @stack_8xv5i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-NEXT: v_writelane_b32 v40, s4, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 7 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 @@ -6702,10 +6705,8 @@ define void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 14 -; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 @@ -6741,10 +6742,9 @@ define void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v28, 5 ; GFX9-NEXT: v_mov_b32_e32 v29, 5 ; GFX9-NEXT: v_mov_b32_e32 v30, 6 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6763,15 +6763,16 @@ define void @stack_8xv5i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 7 :: v_dual_mov_b32 v1, 8 ; GFX11-NEXT: v_dual_mov_b32 v2, 9 :: v_dual_mov_b32 v3, 10 ; GFX11-NEXT: v_dual_mov_b32 v8, 15 :: v_dual_mov_b32 v5, 12 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_dual_mov_b32 v4, 11 :: v_dual_mov_b32 v7, 14 ; GFX11-NEXT: v_mov_b32_e32 v6, 13 ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 @@ -6795,11 +6796,10 @@ define void @stack_8xv5i32() #0 { ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6817,7 +6817,10 @@ define void @stack_8xv5i32() #0 { ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] +; HSA-NEXT: v_writelane_b32 v40, s4, 2 +; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: s_addk_i32 s32, 0x400 +; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: v_mov_b32_e32 v0, 7 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 8 @@ -6833,10 +6836,8 @@ define void @stack_8xv5i32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 13 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; HSA-NEXT: v_mov_b32_e32 v0, 14 -; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; HSA-NEXT: v_mov_b32_e32 v0, 15 -; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 @@ -6872,10 +6873,9 @@ define void @stack_8xv5i32() #0 { ; HSA-NEXT: v_mov_b32_e32 v28, 5 ; HSA-NEXT: v_mov_b32_e32 v29, 5 ; HSA-NEXT: v_mov_b32_e32 v30, 6 -; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] -; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 +; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6906,7 +6906,10 @@ define void @stack_8xv5f32() #0 { ; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; VI-NEXT: s_mov_b64 exec, s[8:9] +; VI-NEXT: v_writelane_b32 v40, s4, 2 +; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: s_addk_i32 s32, 0x400 +; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; VI-NEXT: v_mov_b32_e32 v0, 0x41000000 @@ -6922,10 +6925,8 @@ define void @stack_8xv5f32() #0 { ; VI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; VI-NEXT: v_mov_b32_e32 v0, 0x41600000 -; VI-NEXT: v_writelane_b32 v40, s4, 2 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; VI-NEXT: v_mov_b32_e32 v0, 0x41700000 -; VI-NEXT: v_writelane_b32 v40, s30, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 @@ -6961,10 +6962,9 @@ define void @stack_8xv5f32() #0 { ; VI-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; VI-NEXT: v_mov_b32_e32 v30, 0x40c00000 -; VI-NEXT: v_writelane_b32 v40, s31, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: v_readlane_b32 s30, v40, 0 +; VI-NEXT: v_readlane_b32 s31, v40, 1 ; VI-NEXT: s_mov_b32 s32, s33 ; VI-NEXT: v_readlane_b32 s4, v40, 2 ; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -6982,7 +6982,10 @@ define void @stack_8xv5f32() #0 { ; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CI-NEXT: s_mov_b64 exec, s[8:9] +; CI-NEXT: v_writelane_b32 v40, s4, 2 +; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: s_addk_i32 s32, 0x400 +; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CI-NEXT: v_mov_b32_e32 v0, 0x41000000 @@ -6998,10 +7001,8 @@ define void @stack_8xv5f32() #0 { ; CI-NEXT: v_mov_b32_e32 v0, 0x41500000 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; CI-NEXT: v_mov_b32_e32 v0, 0x41600000 -; CI-NEXT: v_writelane_b32 v40, s4, 2 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; CI-NEXT: v_mov_b32_e32 v0, 0x41700000 -; CI-NEXT: v_writelane_b32 v40, s30, 0 ; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 @@ -7037,10 +7038,9 @@ define void @stack_8xv5f32() #0 { ; CI-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; CI-NEXT: v_mov_b32_e32 v30, 0x40c00000 -; CI-NEXT: v_writelane_b32 v40, s31, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: v_readlane_b32 s30, v40, 0 +; CI-NEXT: v_readlane_b32 s31, v40, 1 ; CI-NEXT: s_mov_b32 s32, s33 ; CI-NEXT: v_readlane_b32 s4, v40, 2 ; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -7058,7 +7058,10 @@ define void @stack_8xv5f32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-NEXT: v_writelane_b32 v40, s4, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000 @@ -7074,10 +7077,8 @@ define void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 -; GFX9-NEXT: v_writelane_b32 v40, s4, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 @@ -7113,10 +7114,9 @@ define void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -7135,19 +7135,20 @@ define void @stack_8xv5f32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41000000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41100000 ; GFX11-NEXT: v_mov_b32_e32 v3, 0x41200000 ; GFX11-NEXT: v_mov_b32_e32 v8, 0x41700000 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_mov_b32_e32 v4, 0x41300000 ; GFX11-NEXT: v_mov_b32_e32 v5, 0x41400000 ; GFX11-NEXT: v_dual_mov_b32 v6, 0x41500000 :: v_dual_mov_b32 v9, 1.0 ; GFX11-NEXT: v_mov_b32_e32 v7, 0x41600000 ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b32 off, v8, s0 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s1 @@ -7170,11 +7171,10 @@ define void @stack_8xv5f32() #0 { ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7192,7 +7192,10 @@ define void @stack_8xv5f32() #0 { ; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 ; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HSA-NEXT: s_mov_b64 exec, s[8:9] +; HSA-NEXT: v_writelane_b32 v40, s4, 2 +; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: s_addk_i32 s32, 0x400 +; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: v_mov_b32_e32 v0, 0x40e00000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41000000 @@ -7208,10 +7211,8 @@ define void @stack_8xv5f32() #0 { ; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000 -; HSA-NEXT: v_writelane_b32 v40, s4, 2 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000 -; HSA-NEXT: v_writelane_b32 v40, s30, 0 ; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; HSA-NEXT: s_getpc_b64 s[4:5] ; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 @@ -7247,10 +7248,9 @@ define void @stack_8xv5f32() #0 { ; HSA-NEXT: v_mov_b32_e32 v28, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; HSA-NEXT: v_mov_b32_e32 v30, 0x40c00000 -; HSA-NEXT: v_writelane_b32 v40, s31, 1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] -; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: v_readlane_b32 s30, v40, 0 +; HSA-NEXT: v_readlane_b32 s31, v40, 1 ; HSA-NEXT: s_mov_b32 s32, s33 ; HSA-NEXT: v_readlane_b32 s4, v40, 2 ; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll index 4df10497bcd27..cdec3b6751e3a 100644 --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -20,8 +20,8 @@ define void @use_vcc() #1 { ; GCN: v_writelane_b32 v40, s30, 0 ; GCN: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s31, v40, 1 ; GCN: v_readlane_b32 s30, v40, 0 +; GCN: v_readlane_b32 s31, v40, 1 ; GCN: v_readlane_b32 s4, v40, 2 ; GCN: s_mov_b32 s33, s4 ; GCN: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll index 61a195f9c314f..8c0991fd32849 100644 --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -36,11 +36,11 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v40, s4, 4 -; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_writelane_b32 v40, s34, 2 -; MUBUF-NEXT: v_writelane_b32 v40, s35, 3 +; MUBUF-NEXT: v_writelane_b32 v40, s34, 0 +; MUBUF-NEXT: v_writelane_b32 v40, s35, 1 +; MUBUF-NEXT: v_writelane_b32 v40, s30, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 3 ; MUBUF-NEXT: s_getpc_b64 s[34:35] ; MUBUF-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 @@ -48,10 +48,10 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35] -; MUBUF-NEXT: v_readlane_b32 s35, v40, 3 -; MUBUF-NEXT: v_readlane_b32 s34, v40, 2 -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s30, v40, 2 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 3 +; MUBUF-NEXT: v_readlane_b32 s35, v40, 1 +; MUBUF-NEXT: v_readlane_b32 s34, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 4 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -70,11 +70,11 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 4 -; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2 -; FLATSCR-NEXT: v_writelane_b32 v40, s35, 3 +; FLATSCR-NEXT: v_writelane_b32 v40, s34, 0 +; FLATSCR-NEXT: v_writelane_b32 v40, s35, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 3 ; FLATSCR-NEXT: s_getpc_b64 s[34:35] ; FLATSCR-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 @@ -82,10 +82,10 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35] -; FLATSCR-NEXT: v_readlane_b32 s35, v40, 3 -; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2 -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 3 +; FLATSCR-NEXT: v_readlane_b32 s35, v40, 1 +; FLATSCR-NEXT: v_readlane_b32 s34, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 4 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -110,20 +110,20 @@ define void @test_func_call_external_void_funcx2() #0 { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v40, s4, 4 -; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_writelane_b32 v40, s34, 2 -; MUBUF-NEXT: v_writelane_b32 v40, s35, 3 +; MUBUF-NEXT: v_writelane_b32 v40, s34, 0 +; MUBUF-NEXT: v_writelane_b32 v40, s35, 1 +; MUBUF-NEXT: v_writelane_b32 v40, s30, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 3 ; MUBUF-NEXT: s_getpc_b64 s[34:35] ; MUBUF-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35] ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35] -; MUBUF-NEXT: v_readlane_b32 s35, v40, 3 -; MUBUF-NEXT: v_readlane_b32 s34, v40, 2 -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s30, v40, 2 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 3 +; MUBUF-NEXT: v_readlane_b32 s35, v40, 1 +; MUBUF-NEXT: v_readlane_b32 s34, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 4 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -142,20 +142,20 @@ define void @test_func_call_external_void_funcx2() #0 { ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 4 -; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2 -; FLATSCR-NEXT: v_writelane_b32 v40, s35, 3 +; FLATSCR-NEXT: v_writelane_b32 v40, s34, 0 +; FLATSCR-NEXT: v_writelane_b32 v40, s35, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 3 ; FLATSCR-NEXT: s_getpc_b64 s[34:35] ; FLATSCR-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35] ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35] -; FLATSCR-NEXT: v_readlane_b32 s35, v40, 3 -; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2 -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 3 +; FLATSCR-NEXT: v_readlane_b32 s35, v40, 1 +; FLATSCR-NEXT: v_readlane_b32 s34, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 4 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -181,8 +181,8 @@ define void @void_func_void_clobber_s30_s31() #2 { ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s31, v0, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v0, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v0, 1 ; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] @@ -200,8 +200,8 @@ define void @void_func_void_clobber_s30_s31() #2 { ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s31, v0, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v0, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v0, 1 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] @@ -523,23 +523,23 @@ define void @callee_saved_sgpr_func() #2 { ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v40, s4, 3 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 +; MUBUF-NEXT: v_writelane_b32 v40, s34, 0 +; MUBUF-NEXT: v_writelane_b32 v40, s30, 1 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 2 ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v40, s34, 2 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; def s40 ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: s_mov_b32 s34, s40 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] +; MUBUF-NEXT: v_readlane_b32 s30, v40, 1 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; use s34 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s34, v40, 2 -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 2 +; MUBUF-NEXT: v_readlane_b32 s34, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 3 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -559,23 +559,23 @@ define void @callee_saved_sgpr_func() #2 { ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 3 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s34, 0 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 2 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; def s40 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: s_mov_b32 s34, s40 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 1 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s34 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2 -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s34, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 3 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -626,13 +626,13 @@ define void @callee_saved_sgpr_vgpr_func() #2 { ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v41, s4, 3 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_writelane_b32 v41, s30, 0 -; MUBUF-NEXT: v_writelane_b32 v41, s31, 1 +; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; MUBUF-NEXT: v_writelane_b32 v41, s34, 0 +; MUBUF-NEXT: v_writelane_b32 v41, s30, 1 +; MUBUF-NEXT: v_writelane_b32 v41, s31, 2 ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; MUBUF-NEXT: v_writelane_b32 v41, s34, 2 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; def s40 ; MUBUF-NEXT: ;;#ASMEND @@ -648,9 +648,9 @@ define void @callee_saved_sgpr_vgpr_func() #2 { ; MUBUF-NEXT: ; use v40 ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: v_readlane_b32 s34, v41, 2 -; MUBUF-NEXT: v_readlane_b32 s31, v41, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v41, 0 +; MUBUF-NEXT: v_readlane_b32 s30, v41, 1 +; MUBUF-NEXT: v_readlane_b32 s31, v41, 2 +; MUBUF-NEXT: v_readlane_b32 s34, v41, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v41, 3 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -670,13 +670,13 @@ define void @callee_saved_sgpr_vgpr_func() #2 { ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v41, s0, 3 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_writelane_b32 v41, s30, 0 -; FLATSCR-NEXT: v_writelane_b32 v41, s31, 1 +; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill +; FLATSCR-NEXT: v_writelane_b32 v41, s34, 0 +; FLATSCR-NEXT: v_writelane_b32 v41, s30, 1 +; FLATSCR-NEXT: v_writelane_b32 v41, s31, 2 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 -; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill -; FLATSCR-NEXT: v_writelane_b32 v41, s34, 2 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; def s40 ; FLATSCR-NEXT: ;;#ASMEND @@ -692,9 +692,9 @@ define void @callee_saved_sgpr_vgpr_func() #2 { ; FLATSCR-NEXT: ; use v40 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload -; FLATSCR-NEXT: v_readlane_b32 s34, v41, 2 -; FLATSCR-NEXT: v_readlane_b32 s31, v41, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v41, 0 +; FLATSCR-NEXT: v_readlane_b32 s30, v41, 1 +; FLATSCR-NEXT: v_readlane_b32 s31, v41, 2 +; FLATSCR-NEXT: v_readlane_b32 s34, v41, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v41, 3 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index e7254eb5c3465..eb3ef69848a88 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -122,18 +122,18 @@ define void @callee_with_stack_and_call() #0 { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[18:19] ; MUBUF-NEXT: v_writelane_b32 v40, s16, 2 -; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: s_getpc_b64 s[16:17] ; MUBUF-NEXT: s_add_u32 s16, s16, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s17, s17, external_void_func_void@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17] -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 2 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -152,18 +152,18 @@ define void @callee_with_stack_and_call() #0 { ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -194,15 +194,15 @@ define void @callee_no_stack_with_call() #0 { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[18:19] ; MUBUF-NEXT: v_writelane_b32 v40, s16, 2 -; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: s_getpc_b64 s[16:17] ; MUBUF-NEXT: s_add_u32 s16, s16, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s17, s17, external_void_func_void@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17] -; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: v_readlane_b32 s4, v40, 2 ; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -221,15 +221,15 @@ define void @callee_no_stack_with_call() #0 { ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] ; FLATSCR-NEXT: v_writelane_b32 v40, s0, 2 -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: v_readlane_b32 s0, v40, 2 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 @@ -359,24 +359,24 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 -; FLATSCR-NEXT: v_writelane_b32 v40, s36, 2 -; FLATSCR-NEXT: v_writelane_b32 v40, s37, 3 -; FLATSCR-NEXT: v_writelane_b32 v40, s38, 4 -; FLATSCR-NEXT: v_writelane_b32 v40, s39, 5 -; FLATSCR-NEXT: v_writelane_b32 v40, s48, 6 -; FLATSCR-NEXT: v_writelane_b32 v40, s49, 7 -; FLATSCR-NEXT: v_writelane_b32 v40, s50, 8 -; FLATSCR-NEXT: v_writelane_b32 v40, s51, 9 -; FLATSCR-NEXT: v_writelane_b32 v40, s52, 10 -; FLATSCR-NEXT: v_writelane_b32 v40, s53, 11 -; FLATSCR-NEXT: v_writelane_b32 v40, s54, 12 -; FLATSCR-NEXT: v_writelane_b32 v40, s55, 13 -; FLATSCR-NEXT: v_writelane_b32 v40, s64, 14 -; FLATSCR-NEXT: v_writelane_b32 v40, s65, 15 -; FLATSCR-NEXT: v_writelane_b32 v40, s66, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s67, 17 +; FLATSCR-NEXT: v_writelane_b32 v40, s36, 0 +; FLATSCR-NEXT: v_writelane_b32 v40, s37, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s38, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s39, 3 +; FLATSCR-NEXT: v_writelane_b32 v40, s48, 4 +; FLATSCR-NEXT: v_writelane_b32 v40, s49, 5 +; FLATSCR-NEXT: v_writelane_b32 v40, s50, 6 +; FLATSCR-NEXT: v_writelane_b32 v40, s51, 7 +; FLATSCR-NEXT: v_writelane_b32 v40, s52, 8 +; FLATSCR-NEXT: v_writelane_b32 v40, s53, 9 +; FLATSCR-NEXT: v_writelane_b32 v40, s54, 10 +; FLATSCR-NEXT: v_writelane_b32 v40, s55, 11 +; FLATSCR-NEXT: v_writelane_b32 v40, s64, 12 +; FLATSCR-NEXT: v_writelane_b32 v40, s65, 13 +; FLATSCR-NEXT: v_writelane_b32 v40, s66, 14 +; FLATSCR-NEXT: v_writelane_b32 v40, s67, 15 +; FLATSCR-NEXT: v_writelane_b32 v40, s30, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 17 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART @@ -414,6 +414,7 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s[16:31] ; FLATSCR-NEXT: ;;#ASMEND +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 16 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s[72:79] ; FLATSCR-NEXT: ;;#ASMEND @@ -423,24 +424,23 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s[0:15] ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s67, v40, 17 -; FLATSCR-NEXT: v_readlane_b32 s66, v40, 16 -; FLATSCR-NEXT: v_readlane_b32 s65, v40, 15 -; FLATSCR-NEXT: v_readlane_b32 s64, v40, 14 -; FLATSCR-NEXT: v_readlane_b32 s55, v40, 13 -; FLATSCR-NEXT: v_readlane_b32 s54, v40, 12 -; FLATSCR-NEXT: v_readlane_b32 s53, v40, 11 -; FLATSCR-NEXT: v_readlane_b32 s52, v40, 10 -; FLATSCR-NEXT: v_readlane_b32 s51, v40, 9 -; FLATSCR-NEXT: v_readlane_b32 s50, v40, 8 -; FLATSCR-NEXT: v_readlane_b32 s49, v40, 7 -; FLATSCR-NEXT: v_readlane_b32 s48, v40, 6 -; FLATSCR-NEXT: v_readlane_b32 s39, v40, 5 -; FLATSCR-NEXT: v_readlane_b32 s38, v40, 4 -; FLATSCR-NEXT: v_readlane_b32 s37, v40, 3 -; FLATSCR-NEXT: v_readlane_b32 s36, v40, 2 -; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v40, 17 +; FLATSCR-NEXT: v_readlane_b32 s67, v40, 15 +; FLATSCR-NEXT: v_readlane_b32 s66, v40, 14 +; FLATSCR-NEXT: v_readlane_b32 s65, v40, 13 +; FLATSCR-NEXT: v_readlane_b32 s64, v40, 12 +; FLATSCR-NEXT: v_readlane_b32 s55, v40, 11 +; FLATSCR-NEXT: v_readlane_b32 s54, v40, 10 +; FLATSCR-NEXT: v_readlane_b32 s53, v40, 9 +; FLATSCR-NEXT: v_readlane_b32 s52, v40, 8 +; FLATSCR-NEXT: v_readlane_b32 s51, v40, 7 +; FLATSCR-NEXT: v_readlane_b32 s50, v40, 6 +; FLATSCR-NEXT: v_readlane_b32 s49, v40, 5 +; FLATSCR-NEXT: v_readlane_b32 s48, v40, 4 +; FLATSCR-NEXT: v_readlane_b32 s39, v40, 3 +; FLATSCR-NEXT: v_readlane_b32 s38, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s37, v40, 1 +; FLATSCR-NEXT: v_readlane_b32 s36, v40, 0 ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] @@ -489,15 +489,15 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 { ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MUBUF-NEXT: s_mov_b32 s4, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 +; MUBUF-NEXT: s_addk_i32 s32, 0x300 ; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber v41 ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: s_addk_i32 s32, 0x300 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_mov_b32 s33, s4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) @@ -508,15 +508,15 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 { ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; FLATSCR-NEXT: s_mov_b32 s0, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s32 -; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 +; FLATSCR-NEXT: s_add_i32 s32, s32, 12 ; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber v41 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_add_i32 s32, s32, 12 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) @@ -537,6 +537,8 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: v_writelane_b32 v1, s48, 0 ; MUBUF-NEXT: v_writelane_b32 v1, s49, 1 ; MUBUF-NEXT: v_writelane_b32 v1, s50, 2 @@ -566,19 +568,17 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: v_writelane_b32 v1, s98, 26 ; MUBUF-NEXT: v_writelane_b32 v1, s99, 27 ; MUBUF-NEXT: v_writelane_b32 v1, s100, 28 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 -; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: v_writelane_b32 v1, s101, 29 +; MUBUF-NEXT: v_writelane_b32 v1, s102, 30 +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber v41 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_writelane_b32 v1, s102, 30 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_readlane_b32 s102, v1, 30 ; MUBUF-NEXT: v_readlane_b32 s101, v1, 29 ; MUBUF-NEXT: v_readlane_b32 s100, v1, 28 @@ -626,6 +626,8 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: v_writelane_b32 v1, s48, 0 ; FLATSCR-NEXT: v_writelane_b32 v1, s49, 1 ; FLATSCR-NEXT: v_writelane_b32 v1, s50, 2 @@ -655,19 +657,17 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: v_writelane_b32 v1, s98, 26 ; FLATSCR-NEXT: v_writelane_b32 v1, s99, 27 ; FLATSCR-NEXT: v_writelane_b32 v1, s100, 28 -; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 -; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: v_writelane_b32 v1, s101, 29 +; FLATSCR-NEXT: v_writelane_b32 v1, s102, 30 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber v41 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_writelane_b32 v1, s102, 30 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_readlane_b32 s102, v1, 30 ; FLATSCR-NEXT: v_readlane_b32 s101, v1, 29 ; FLATSCR-NEXT: v_readlane_b32 s100, v1, 28 @@ -731,6 +731,8 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: v_writelane_b32 v1, s39, 0 ; MUBUF-NEXT: v_writelane_b32 v1, s48, 1 ; MUBUF-NEXT: v_writelane_b32 v1, s49, 2 @@ -761,19 +763,17 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: v_writelane_b32 v1, s98, 27 ; MUBUF-NEXT: v_writelane_b32 v1, s99, 28 ; MUBUF-NEXT: v_writelane_b32 v1, s100, 29 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 -; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: v_writelane_b32 v1, s101, 30 +; MUBUF-NEXT: v_writelane_b32 v1, s102, 31 +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber v41 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_writelane_b32 v1, s102, 31 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_readlane_b32 s102, v1, 31 ; MUBUF-NEXT: v_readlane_b32 s101, v1, 30 ; MUBUF-NEXT: v_readlane_b32 s100, v1, 29 @@ -822,6 +822,8 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: v_writelane_b32 v1, s39, 0 ; FLATSCR-NEXT: v_writelane_b32 v1, s48, 1 ; FLATSCR-NEXT: v_writelane_b32 v1, s49, 2 @@ -852,19 +854,17 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: v_writelane_b32 v1, s98, 27 ; FLATSCR-NEXT: v_writelane_b32 v1, s99, 28 ; FLATSCR-NEXT: v_writelane_b32 v1, s100, 29 -; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 -; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: v_writelane_b32 v1, s101, 30 +; FLATSCR-NEXT: v_writelane_b32 v1, s102, 31 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber v41 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_writelane_b32 v1, s102, 31 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_readlane_b32 s102, v1, 31 ; FLATSCR-NEXT: v_readlane_b32 s101, v1, 30 ; FLATSCR-NEXT: v_readlane_b32 s100, v1, 29 @@ -970,15 +970,15 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] ; MUBUF-NEXT: v_writelane_b32 v1, s30, 0 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: s_addk_i32 s32, 0x300 ; MUBUF-NEXT: v_writelane_b32 v1, s31, 1 +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s31, v1, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v1, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v1, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -996,15 +996,15 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:4 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] ; FLATSCR-NEXT: v_writelane_b32 v1, s30, 0 -; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_add_i32 s32, s32, 12 ; FLATSCR-NEXT: v_writelane_b32 v1, s31, 1 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s31, v1, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v1, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v1, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:4 ; 4-byte Folded Reload @@ -1036,18 +1036,18 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: s_addk_i32 s32, 0x300 ; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND +; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved initial VGPRs ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -1065,18 +1065,18 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 -; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_add_i32 s32, s32, 12 ; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 +; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved initial VGPRs ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v40, off, s33 offset:4 ; 4-byte Folded Reload @@ -1116,20 +1116,20 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s6 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] ; MUBUF-NEXT: v_writelane_b32 v40, s30, 0 -; MUBUF-NEXT: v_mov_b32_e32 v0, 0 -; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1000 ; MUBUF-NEXT: s_add_i32 s32, s32, 0x40300 ; MUBUF-NEXT: v_writelane_b32 v40, s31, 1 +; MUBUF-NEXT: v_mov_b32_e32 v0, 0 +; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1000 ; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s33 offen ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs ; MUBUF-NEXT: ;;#ASMEND +; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved VGPRs ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100 @@ -1148,21 +1148,21 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) ; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004 ; FLATSCR-NEXT: scratch_store_dword off, v40, s2 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: s_addk_i32 s32, 0x100c ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 +; FLATSCR-NEXT: s_addk_i32 s32, 0x100c +; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_add_i32 s0, s33, 0x1000 -; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved SGPRs ; FLATSCR-NEXT: ;;#ASMEND +; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved VGPRs ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004 @@ -1210,18 +1210,18 @@ define void @ipra_call_with_stack() #0 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[16:17] -; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: v_writelane_b32 v1, s30, 0 +; MUBUF-NEXT: s_addk_i32 s32, 0x400 +; MUBUF-NEXT: v_writelane_b32 v1, s31, 1 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: s_getpc_b64 s[16:17] ; MUBUF-NEXT: s_add_u32 s16, s16, local_empty_func@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s17, s17, local_empty_func@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v1, s31, 1 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[16:17] -; MUBUF-NEXT: v_readlane_b32 s31, v1, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v1, 0 +; MUBUF-NEXT: v_readlane_b32 s31, v1, 1 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -1238,18 +1238,18 @@ define void @ipra_call_with_stack() #0 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:4 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: v_writelane_b32 v1, s30, 0 +; FLATSCR-NEXT: s_add_i32 s32, s32, 16 +; FLATSCR-NEXT: v_writelane_b32 v1, s31, 1 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, local_empty_func@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, local_empty_func@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v1, s31, 1 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] -; FLATSCR-NEXT: v_readlane_b32 s31, v1, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v1, 0 +; FLATSCR-NEXT: v_readlane_b32 s31, v1, 1 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:4 ; 4-byte Folded Reload @@ -1319,6 +1319,7 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v39, s4, 32 +; MUBUF-NEXT: s_addk_i32 s32, 0x200 ; MUBUF-NEXT: v_writelane_b32 v39, s39, 0 ; MUBUF-NEXT: v_writelane_b32 v39, s48, 1 ; MUBUF-NEXT: v_writelane_b32 v39, s49, 2 @@ -1350,7 +1351,6 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; MUBUF-NEXT: v_writelane_b32 v39, s99, 28 ; MUBUF-NEXT: v_writelane_b32 v39, s100, 29 ; MUBUF-NEXT: v_writelane_b32 v39, s101, 30 -; MUBUF-NEXT: s_addk_i32 s32, 0x200 ; MUBUF-NEXT: v_writelane_b32 v39, s102, 31 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs @@ -1407,6 +1407,7 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v39, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_add_i32 s32, s32, 8 ; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0 ; FLATSCR-NEXT: v_writelane_b32 v39, s48, 1 ; FLATSCR-NEXT: v_writelane_b32 v39, s49, 2 @@ -1438,7 +1439,6 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; FLATSCR-NEXT: v_writelane_b32 v39, s99, 28 ; FLATSCR-NEXT: v_writelane_b32 v39, s100, 29 ; FLATSCR-NEXT: v_writelane_b32 v39, s101, 30 -; FLATSCR-NEXT: s_add_i32 s32, s32, 8 ; FLATSCR-NEXT: v_writelane_b32 v39, s102, 31 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs @@ -1519,6 +1519,7 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v40, s4, 32 +; MUBUF-NEXT: s_addk_i32 s32, 0x200 ; MUBUF-NEXT: v_writelane_b32 v40, s39, 0 ; MUBUF-NEXT: v_writelane_b32 v40, s48, 1 ; MUBUF-NEXT: v_writelane_b32 v40, s49, 2 @@ -1550,7 +1551,6 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; MUBUF-NEXT: v_writelane_b32 v40, s99, 28 ; MUBUF-NEXT: v_writelane_b32 v40, s100, 29 ; MUBUF-NEXT: v_writelane_b32 v40, s101, 30 -; MUBUF-NEXT: s_addk_i32 s32, 0x200 ; MUBUF-NEXT: v_writelane_b32 v40, s102, 31 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs @@ -1607,6 +1607,7 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_add_i32 s32, s32, 8 ; FLATSCR-NEXT: v_writelane_b32 v40, s39, 0 ; FLATSCR-NEXT: v_writelane_b32 v40, s48, 1 ; FLATSCR-NEXT: v_writelane_b32 v40, s49, 2 @@ -1638,7 +1639,6 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; FLATSCR-NEXT: v_writelane_b32 v40, s99, 28 ; FLATSCR-NEXT: v_writelane_b32 v40, s100, 29 ; FLATSCR-NEXT: v_writelane_b32 v40, s101, 30 -; FLATSCR-NEXT: s_add_i32 s32, s32, 8 ; FLATSCR-NEXT: v_writelane_b32 v40, s102, 31 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs @@ -1718,6 +1718,7 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s5 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] ; MUBUF-NEXT: v_writelane_b32 v39, s4, 32 +; MUBUF-NEXT: s_add_i32 s32, s32, 0x40300 ; MUBUF-NEXT: v_writelane_b32 v39, s39, 0 ; MUBUF-NEXT: v_writelane_b32 v39, s48, 1 ; MUBUF-NEXT: v_writelane_b32 v39, s49, 2 @@ -1749,10 +1750,9 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; MUBUF-NEXT: v_writelane_b32 v39, s99, 28 ; MUBUF-NEXT: v_writelane_b32 v39, s100, 29 ; MUBUF-NEXT: v_writelane_b32 v39, s101, 30 +; MUBUF-NEXT: v_writelane_b32 v39, s102, 31 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1000 -; MUBUF-NEXT: s_add_i32 s32, s32, 0x40300 -; MUBUF-NEXT: v_writelane_b32 v39, s102, 31 ; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s33 offen ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART @@ -1812,6 +1812,7 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1004 ; FLATSCR-NEXT: scratch_store_dword off, v39, s1 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] +; FLATSCR-NEXT: s_addk_i32 s32, 0x100c ; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0 ; FLATSCR-NEXT: v_writelane_b32 v39, s48, 1 ; FLATSCR-NEXT: v_writelane_b32 v39, s49, 2 @@ -1841,12 +1842,11 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; FLATSCR-NEXT: v_writelane_b32 v39, s97, 26 ; FLATSCR-NEXT: v_writelane_b32 v39, s98, 27 ; FLATSCR-NEXT: v_writelane_b32 v39, s99, 28 -; FLATSCR-NEXT: s_addk_i32 s32, 0x100c ; FLATSCR-NEXT: v_writelane_b32 v39, s100, 29 ; FLATSCR-NEXT: v_writelane_b32 v39, s101, 30 +; FLATSCR-NEXT: v_writelane_b32 v39, s102, 31 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1000 -; FLATSCR-NEXT: v_writelane_b32 v39, s102, 31 ; FLATSCR-NEXT: scratch_store_dword off, v0, s1 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll index fccee3da6d77e..7abde5b74367d 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll @@ -420,18 +420,18 @@ define void @func_indirect_use_workitem_id_x() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -453,18 +453,18 @@ define void @func_indirect_use_workitem_id_y() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_y@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_y@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -486,18 +486,18 @@ define void @func_indirect_use_workitem_id_z() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_z@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_z@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -939,8 +939,10 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[6:7] -; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: v_writelane_b32 v40, s4, 2 +; GFX7-NEXT: v_writelane_b32 v40, s30, 0 +; GFX7-NEXT: s_addk_i32 s32, 0x400 +; GFX7-NEXT: v_writelane_b32 v40, s31, 1 ; GFX7-NEXT: s_getpc_b64 s[4:5] ; GFX7-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GFX7-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 @@ -948,7 +950,6 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX7-NEXT: flat_store_dword v[0:1], v0 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, 0x140 -; GFX7-NEXT: v_writelane_b32 v40, s30, 0 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX7-NEXT: v_mov_b32_e32 v0, 10 ; GFX7-NEXT: v_mov_b32_e32 v1, 20 @@ -981,11 +982,10 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX7-NEXT: v_mov_b32_e32 v28, 0x122 ; GFX7-NEXT: v_mov_b32_e32 v29, 0x12c ; GFX7-NEXT: v_mov_b32_e32 v30, 0x136 -; GFX7-NEXT: v_writelane_b32 v40, s31, 1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: v_readlane_b32 s30, v40, 0 +; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v40, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1003,8 +1003,10 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX90A-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[6:7] -; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: v_writelane_b32 v40, s4, 2 +; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 ; GFX90A-NEXT: s_getpc_b64 s[4:5] ; GFX90A-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 @@ -1012,7 +1014,6 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX90A-NEXT: global_store_dword v[0:1], v0, off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, 0x140 -; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX90A-NEXT: v_mov_b32_e32 v0, 10 ; GFX90A-NEXT: v_mov_b32_e32 v1, 20 @@ -1045,11 +1046,10 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GFX90A-NEXT: v_mov_b32_e32 v28, 0x122 ; GFX90A-NEXT: v_mov_b32_e32 v29, 0x12c ; GFX90A-NEXT: v_mov_b32_e32 v30, 0x136 -; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX90A-NEXT: v_readlane_b32 s31, v40, 1 ; GFX90A-NEXT: v_readlane_b32 s30, v40, 0 +; GFX90A-NEXT: v_readlane_b32 s31, v40, 1 ; GFX90A-NEXT: s_mov_b32 s32, s33 ; GFX90A-NEXT: v_readlane_b32 s4, v40, 2 ; GFX90A-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1081,21 +1081,21 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x( ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1396,19 +1396,20 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 +; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, 0x140 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 -; GCN-NEXT: v_writelane_b32 v40, s4, 2 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 20 ; GCN-NEXT: v_mov_b32_e32 v2, 30 ; GCN-NEXT: v_mov_b32_e32 v3, 40 @@ -1439,14 +1440,13 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: v_mov_b32_e32 v28, 0x122 ; GCN-NEXT: v_mov_b32_e32 v29, 0x12c ; GCN-NEXT: v_mov_b32_e32 v30, 0x136 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GCN-NEXT: v_mov_b32_e32 v0, 10 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index bb2f06bfe83f8..718140f82887e 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -265,18 +265,18 @@ define void @func_indirect_use_workitem_id_x() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_x@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -298,18 +298,18 @@ define void @func_indirect_use_workitem_id_y() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_y@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_y@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -331,18 +331,18 @@ define void @func_indirect_use_workitem_id_z() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, use_workitem_id_z@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, use_workitem_id_z@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -651,8 +651,10 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 @@ -660,7 +662,6 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GCN-NEXT: flat_store_dword v[0:1], v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, 0x140 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GCN-NEXT: v_mov_b32_e32 v0, 10 ; GCN-NEXT: v_mov_b32_e32 v1, 20 @@ -693,11 +694,10 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; GCN-NEXT: v_mov_b32_e32 v28, 0x122 ; GCN-NEXT: v_mov_b32_e32 v29, 0x12c ; GCN-NEXT: v_mov_b32_e32 v30, 0x136 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -729,21 +729,21 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x( ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -970,19 +970,20 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 +; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_mov_b32_e32 v0, 0x3e7 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, 0x140 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 -; GCN-NEXT: v_writelane_b32 v40, s4, 2 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, too_many_args_use_workitem_id_x_byval@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, too_many_args_use_workitem_id_x_byval@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 20 ; GCN-NEXT: v_mov_b32_e32 v2, 30 ; GCN-NEXT: v_mov_b32_e32 v3, 40 @@ -1013,14 +1014,13 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ; GCN-NEXT: v_mov_b32_e32 v28, 0x122 ; GCN-NEXT: v_mov_b32_e32 v29, 0x12c ; GCN-NEXT: v_mov_b32_e32 v30, 0x136 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GCN-NEXT: v_mov_b32_e32 v0, 10 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1461,16 +1461,16 @@ define void @func_call_no_workitem_id_hints() #2 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, extern_hint@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, extern_hint@rel32@hi+12 ; GCN-NEXT: v_mov_b32_e32 v0, 9 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index 38c20c7cf62d6..9335cc304c294 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -33,15 +33,15 @@ define float @call_split_type_used_outside_block_v2f32() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, func_v2f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v2f32@rel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -69,15 +69,15 @@ define float @call_split_type_used_outside_block_v3f32() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, func_v3f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v3f32@rel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -105,15 +105,15 @@ define half @call_split_type_used_outside_block_v4f16() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, func_v4f16@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v4f16@rel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -141,15 +141,15 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, func_struct@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_struct@rel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_mov_b32_e32 v1, v4 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir index 6504f48333485..209ac8e811456 100644 --- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir +++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir @@ -15,6 +15,12 @@ body: | ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll b/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll index ed609f85918f9..20077fa5d96a7 100644 --- a/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll +++ b/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll @@ -8,6 +8,8 @@ define amdgpu_kernel void @_Z3fooPiiii(ptr addrspace(1) nocapture noundef writeo ; CHECK-NEXT: .cfi_sections .debug_frame ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 ; CFA is 0 in private_wave aspace +; CHECK-NEXT: .cfi_undefined 16 ; CHECK-NEXT: .file 1 "." "a.h" ; CHECK-NEXT: .loc 1 5 12 prologue_end ; ./a.h:5:12 @[ a.hip:12:8 ] ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8 diff --git a/llvm/test/CodeGen/AMDGPU/debug-frame.ll b/llvm/test/CodeGen/AMDGPU/debug-frame.ll new file mode 100644 index 0000000000000..c3c93c1b606ec --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/debug-frame.ll @@ -0,0 +1,3551 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-spill-vgpr-to-agpr=0 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,GFX90A-V2A-DIS %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-spill-vgpr-to-agpr=1 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,GFX90A-V2A-EN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,WAVE32 %s + +define protected amdgpu_kernel void @kern1() #0 { +; CHECK-LABEL: kern1: +; CHECK: .Lfunc_begin0: +; CHECK-NEXT: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 ; CFA is 0 in private_wave aspace +; CHECK-NEXT: .cfi_undefined 16 +; CHECK-NEXT: s_endpgm +entry: + ret void +} + +define hidden void @func_no_clobber() #0 { +; CHECK-LABEL: func_no_clobber: +; CHECK: .Lfunc_begin1: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + ret void +} + +define void @callee_need_to_spill_fp_to_memory() #1 { +; GFX900-LABEL: callee_need_to_spill_fp_to_memory: +; GFX900: .Lfunc_begin2: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: +; GFX900-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX900-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX900-NEXT: .cfi_undefined 2560 +; GFX900-NEXT: .cfi_undefined 2561 +; GFX900-NEXT: .cfi_undefined 2562 +; GFX900-NEXT: .cfi_undefined 2563 +; GFX900-NEXT: .cfi_undefined 2564 +; GFX900-NEXT: .cfi_undefined 2565 +; GFX900-NEXT: .cfi_undefined 2566 +; GFX900-NEXT: .cfi_undefined 2567 +; GFX900-NEXT: .cfi_undefined 2568 +; GFX900-NEXT: .cfi_undefined 2569 +; GFX900-NEXT: .cfi_undefined 2570 +; GFX900-NEXT: .cfi_undefined 2571 +; GFX900-NEXT: .cfi_undefined 2572 +; GFX900-NEXT: .cfi_undefined 2573 +; GFX900-NEXT: .cfi_undefined 2574 +; GFX900-NEXT: .cfi_undefined 2575 +; GFX900-NEXT: .cfi_undefined 2576 +; GFX900-NEXT: .cfi_undefined 2577 +; GFX900-NEXT: .cfi_undefined 2578 +; GFX900-NEXT: .cfi_undefined 2579 +; GFX900-NEXT: .cfi_undefined 2580 +; GFX900-NEXT: .cfi_undefined 2581 +; GFX900-NEXT: .cfi_undefined 2582 +; GFX900-NEXT: .cfi_undefined 2583 +; GFX900-NEXT: .cfi_undefined 2584 +; GFX900-NEXT: .cfi_undefined 2585 +; GFX900-NEXT: .cfi_undefined 2586 +; GFX900-NEXT: .cfi_undefined 2587 +; GFX900-NEXT: .cfi_undefined 2588 +; GFX900-NEXT: .cfi_undefined 2589 +; GFX900-NEXT: .cfi_undefined 2590 +; GFX900-NEXT: .cfi_undefined 2591 +; GFX900-NEXT: .cfi_undefined 2592 +; GFX900-NEXT: .cfi_undefined 2593 +; GFX900-NEXT: .cfi_undefined 2594 +; GFX900-NEXT: .cfi_undefined 2595 +; GFX900-NEXT: .cfi_undefined 2596 +; GFX900-NEXT: .cfi_undefined 2597 +; GFX900-NEXT: .cfi_undefined 2598 +; GFX900-NEXT: .cfi_undefined 2599 +; GFX900-NEXT: .cfi_undefined 2608 +; GFX900-NEXT: .cfi_undefined 2609 +; GFX900-NEXT: .cfi_undefined 2610 +; GFX900-NEXT: .cfi_undefined 2611 +; GFX900-NEXT: .cfi_undefined 2612 +; GFX900-NEXT: .cfi_undefined 2613 +; GFX900-NEXT: .cfi_undefined 2614 +; GFX900-NEXT: .cfi_undefined 2615 +; GFX900-NEXT: .cfi_undefined 2624 +; GFX900-NEXT: .cfi_undefined 2625 +; GFX900-NEXT: .cfi_undefined 2626 +; GFX900-NEXT: .cfi_undefined 2627 +; GFX900-NEXT: .cfi_undefined 2628 +; GFX900-NEXT: .cfi_undefined 2629 +; GFX900-NEXT: .cfi_undefined 2630 +; GFX900-NEXT: .cfi_undefined 2631 +; GFX900-NEXT: .cfi_undefined 2640 +; GFX900-NEXT: .cfi_undefined 2641 +; GFX900-NEXT: .cfi_undefined 2642 +; GFX900-NEXT: .cfi_undefined 2643 +; GFX900-NEXT: .cfi_undefined 2644 +; GFX900-NEXT: .cfi_undefined 2645 +; GFX900-NEXT: .cfi_undefined 2646 +; GFX900-NEXT: .cfi_undefined 2647 +; GFX900-NEXT: .cfi_undefined 2656 +; GFX900-NEXT: .cfi_undefined 2657 +; GFX900-NEXT: .cfi_undefined 2658 +; GFX900-NEXT: .cfi_undefined 2659 +; GFX900-NEXT: .cfi_undefined 2660 +; GFX900-NEXT: .cfi_undefined 2661 +; GFX900-NEXT: .cfi_undefined 2662 +; GFX900-NEXT: .cfi_undefined 2663 +; GFX900-NEXT: .cfi_undefined 2672 +; GFX900-NEXT: .cfi_undefined 2673 +; GFX900-NEXT: .cfi_undefined 2674 +; GFX900-NEXT: .cfi_undefined 2675 +; GFX900-NEXT: .cfi_undefined 2676 +; GFX900-NEXT: .cfi_undefined 2677 +; GFX900-NEXT: .cfi_undefined 2678 +; GFX900-NEXT: .cfi_undefined 2679 +; GFX900-NEXT: .cfi_undefined 2688 +; GFX900-NEXT: .cfi_undefined 2689 +; GFX900-NEXT: .cfi_undefined 2690 +; GFX900-NEXT: .cfi_undefined 2691 +; GFX900-NEXT: .cfi_undefined 2692 +; GFX900-NEXT: .cfi_undefined 2693 +; GFX900-NEXT: .cfi_undefined 2694 +; GFX900-NEXT: .cfi_undefined 2695 +; GFX900-NEXT: .cfi_undefined 2704 +; GFX900-NEXT: .cfi_undefined 2705 +; GFX900-NEXT: .cfi_undefined 2706 +; GFX900-NEXT: .cfi_undefined 2707 +; GFX900-NEXT: .cfi_undefined 2708 +; GFX900-NEXT: .cfi_undefined 2709 +; GFX900-NEXT: .cfi_undefined 2710 +; GFX900-NEXT: .cfi_undefined 2711 +; GFX900-NEXT: .cfi_undefined 2720 +; GFX900-NEXT: .cfi_undefined 2721 +; GFX900-NEXT: .cfi_undefined 2722 +; GFX900-NEXT: .cfi_undefined 2723 +; GFX900-NEXT: .cfi_undefined 2724 +; GFX900-NEXT: .cfi_undefined 2725 +; GFX900-NEXT: .cfi_undefined 2726 +; GFX900-NEXT: .cfi_undefined 2727 +; GFX900-NEXT: .cfi_undefined 2736 +; GFX900-NEXT: .cfi_undefined 2737 +; GFX900-NEXT: .cfi_undefined 2738 +; GFX900-NEXT: .cfi_undefined 2739 +; GFX900-NEXT: .cfi_undefined 2740 +; GFX900-NEXT: .cfi_undefined 2741 +; GFX900-NEXT: .cfi_undefined 2742 +; GFX900-NEXT: .cfi_undefined 2743 +; GFX900-NEXT: .cfi_undefined 2752 +; GFX900-NEXT: .cfi_undefined 2753 +; GFX900-NEXT: .cfi_undefined 2754 +; GFX900-NEXT: .cfi_undefined 2755 +; GFX900-NEXT: .cfi_undefined 2756 +; GFX900-NEXT: .cfi_undefined 2757 +; GFX900-NEXT: .cfi_undefined 2758 +; GFX900-NEXT: .cfi_undefined 2759 +; GFX900-NEXT: .cfi_undefined 2768 +; GFX900-NEXT: .cfi_undefined 2769 +; GFX900-NEXT: .cfi_undefined 2770 +; GFX900-NEXT: .cfi_undefined 2771 +; GFX900-NEXT: .cfi_undefined 2772 +; GFX900-NEXT: .cfi_undefined 2773 +; GFX900-NEXT: .cfi_undefined 2774 +; GFX900-NEXT: .cfi_undefined 2775 +; GFX900-NEXT: .cfi_undefined 2784 +; GFX900-NEXT: .cfi_undefined 2785 +; GFX900-NEXT: .cfi_undefined 2786 +; GFX900-NEXT: .cfi_undefined 2787 +; GFX900-NEXT: .cfi_undefined 2788 +; GFX900-NEXT: .cfi_undefined 2789 +; GFX900-NEXT: .cfi_undefined 2790 +; GFX900-NEXT: .cfi_undefined 2791 +; GFX900-NEXT: .cfi_undefined 2800 +; GFX900-NEXT: .cfi_undefined 2801 +; GFX900-NEXT: .cfi_undefined 2802 +; GFX900-NEXT: .cfi_undefined 2803 +; GFX900-NEXT: .cfi_undefined 2804 +; GFX900-NEXT: .cfi_undefined 2805 +; GFX900-NEXT: .cfi_undefined 2806 +; GFX900-NEXT: .cfi_undefined 2807 +; GFX900-NEXT: .cfi_undefined 36 +; GFX900-NEXT: .cfi_undefined 37 +; GFX900-NEXT: .cfi_undefined 38 +; GFX900-NEXT: .cfi_undefined 39 +; GFX900-NEXT: .cfi_undefined 40 +; GFX900-NEXT: .cfi_undefined 41 +; GFX900-NEXT: .cfi_undefined 42 +; GFX900-NEXT: .cfi_undefined 43 +; GFX900-NEXT: .cfi_undefined 44 +; GFX900-NEXT: .cfi_undefined 45 +; GFX900-NEXT: .cfi_undefined 46 +; GFX900-NEXT: .cfi_undefined 47 +; GFX900-NEXT: .cfi_undefined 48 +; GFX900-NEXT: .cfi_undefined 49 +; GFX900-NEXT: .cfi_undefined 50 +; GFX900-NEXT: .cfi_undefined 51 +; GFX900-NEXT: .cfi_undefined 52 +; GFX900-NEXT: .cfi_undefined 53 +; GFX900-NEXT: .cfi_undefined 54 +; GFX900-NEXT: .cfi_undefined 55 +; GFX900-NEXT: .cfi_undefined 56 +; GFX900-NEXT: .cfi_undefined 57 +; GFX900-NEXT: .cfi_undefined 58 +; GFX900-NEXT: .cfi_undefined 59 +; GFX900-NEXT: .cfi_undefined 60 +; GFX900-NEXT: .cfi_undefined 61 +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s40, s33 +; GFX900-NEXT: .cfi_register 65, 72 +; GFX900-NEXT: s_mov_b32 s33, s32 +; GFX900-NEXT: .cfi_def_cfa_register 65 +; GFX900-NEXT: s_addk_i32 s32, 0x7100 +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 28416 +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 28160 +; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2602, 32, 17, 64, 27904 +; GFX900-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2603, 32, 17, 64, 27648 +; GFX900-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2604, 32, 17, 64, 27392 +; GFX900-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2605, 32, 17, 64, 27136 +; GFX900-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2606, 32, 17, 64, 26880 +; GFX900-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2607, 32, 17, 64, 26624 +; GFX900-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2616, 32, 17, 64, 26368 +; GFX900-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2617, 32, 17, 64, 26112 +; GFX900-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2618, 32, 17, 64, 25856 +; GFX900-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2619, 32, 17, 64, 25600 +; GFX900-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2620, 32, 17, 64, 25344 +; GFX900-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2621, 32, 17, 64, 25088 +; GFX900-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2622, 32, 17, 64, 24832 +; GFX900-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2623, 32, 17, 64, 24576 +; GFX900-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2632, 32, 17, 64, 24320 +; GFX900-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2633, 32, 17, 64, 24064 +; GFX900-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2634, 32, 17, 64, 23808 +; GFX900-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2635, 32, 17, 64, 23552 +; GFX900-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2636, 32, 17, 64, 23296 +; GFX900-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2637, 32, 17, 64, 23040 +; GFX900-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2638, 32, 17, 64, 22784 +; GFX900-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2639, 32, 17, 64, 22528 +; GFX900-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2648, 32, 17, 64, 22272 +; GFX900-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2649, 32, 17, 64, 22016 +; GFX900-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2650, 32, 17, 64, 21760 +; GFX900-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2651, 32, 17, 64, 21504 +; GFX900-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2652, 32, 17, 64, 21248 +; GFX900-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2653, 32, 17, 64, 20992 +; GFX900-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2654, 32, 17, 64, 20736 +; GFX900-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2655, 32, 17, 64, 20480 +; GFX900-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2664, 32, 17, 64, 20224 +; GFX900-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2665, 32, 17, 64, 19968 +; GFX900-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2666, 32, 17, 64, 19712 +; GFX900-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2667, 32, 17, 64, 19456 +; GFX900-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2668, 32, 17, 64, 19200 +; GFX900-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2669, 32, 17, 64, 18944 +; GFX900-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2670, 32, 17, 64, 18688 +; GFX900-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2671, 32, 17, 64, 18432 +; GFX900-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2680, 32, 17, 64, 18176 +; GFX900-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2681, 32, 17, 64, 17920 +; GFX900-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2682, 32, 17, 64, 17664 +; GFX900-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2683, 32, 17, 64, 17408 +; GFX900-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2684, 32, 17, 64, 17152 +; GFX900-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2685, 32, 17, 64, 16896 +; GFX900-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2686, 32, 17, 64, 16640 +; GFX900-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2687, 32, 17, 64, 16384 +; GFX900-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2696, 32, 17, 64, 16128 +; GFX900-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2697, 32, 17, 64, 15872 +; GFX900-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2698, 32, 17, 64, 15616 +; GFX900-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2699, 32, 17, 64, 15360 +; GFX900-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2700, 32, 17, 64, 15104 +; GFX900-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2701, 32, 17, 64, 14848 +; GFX900-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2702, 32, 17, 64, 14592 +; GFX900-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2703, 32, 17, 64, 14336 +; GFX900-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2712, 32, 17, 64, 14080 +; GFX900-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2713, 32, 17, 64, 13824 +; GFX900-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2714, 32, 17, 64, 13568 +; GFX900-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2715, 32, 17, 64, 13312 +; GFX900-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2716, 32, 17, 64, 13056 +; GFX900-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2717, 32, 17, 64, 12800 +; GFX900-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2718, 32, 17, 64, 12544 +; GFX900-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2719, 32, 17, 64, 12288 +; GFX900-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2728, 32, 17, 64, 12032 +; GFX900-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2729, 32, 17, 64, 11776 +; GFX900-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2730, 32, 17, 64, 11520 +; GFX900-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2731, 32, 17, 64, 11264 +; GFX900-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2732, 32, 17, 64, 11008 +; GFX900-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2733, 32, 17, 64, 10752 +; GFX900-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2734, 32, 17, 64, 10496 +; GFX900-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2735, 32, 17, 64, 10240 +; GFX900-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2744, 32, 17, 64, 9984 +; GFX900-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2745, 32, 17, 64, 9728 +; GFX900-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2746, 32, 17, 64, 9472 +; GFX900-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2747, 32, 17, 64, 9216 +; GFX900-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2748, 32, 17, 64, 8960 +; GFX900-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2749, 32, 17, 64, 8704 +; GFX900-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2750, 32, 17, 64, 8448 +; GFX900-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2751, 32, 17, 64, 8192 +; GFX900-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2760, 32, 17, 64, 7936 +; GFX900-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2761, 32, 17, 64, 7680 +; GFX900-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2762, 32, 17, 64, 7424 +; GFX900-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2763, 32, 17, 64, 7168 +; GFX900-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2764, 32, 17, 64, 6912 +; GFX900-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2765, 32, 17, 64, 6656 +; GFX900-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2766, 32, 17, 64, 6400 +; GFX900-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2767, 32, 17, 64, 6144 +; GFX900-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2776, 32, 17, 64, 5888 +; GFX900-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2777, 32, 17, 64, 5632 +; GFX900-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2778, 32, 17, 64, 5376 +; GFX900-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2779, 32, 17, 64, 5120 +; GFX900-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2780, 32, 17, 64, 4864 +; GFX900-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2781, 32, 17, 64, 4608 +; GFX900-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2782, 32, 17, 64, 4352 +; GFX900-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2783, 32, 17, 64, 4096 +; GFX900-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2792, 32, 17, 64, 3840 +; GFX900-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2793, 32, 17, 64, 3584 +; GFX900-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2794, 32, 17, 64, 3328 +; GFX900-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2795, 32, 17, 64, 3072 +; GFX900-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2796, 32, 17, 64, 2816 +; GFX900-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2797, 32, 17, 64, 2560 +; GFX900-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2798, 32, 17, 64, 2304 +; GFX900-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2799, 32, 17, 64, 2048 +; GFX900-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2808, 32, 17, 64, 1792 +; GFX900-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2809, 32, 17, 64, 1536 +; GFX900-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2810, 32, 17, 64, 1280 +; GFX900-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2811, 32, 17, 64, 1024 +; GFX900-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2812, 32, 17, 64, 768 +; GFX900-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2813, 32, 17, 64, 512 +; GFX900-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2814, 32, 17, 64, 256 +; GFX900-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2815, 32, 17, 64, 0 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber nonpreserved SGPRs +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber all VGPRs +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b32 s32, s33 +; GFX900-NEXT: .cfi_def_cfa_register 64 +; GFX900-NEXT: s_mov_b32 s33, s40 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: callee_need_to_spill_fp_to_memory: +; GFX90A-V2A-DIS: .Lfunc_begin2: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2562 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2563 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2564 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2565 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2566 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2567 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2568 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2569 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2570 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2571 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2572 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2573 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2574 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2575 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2576 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2577 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2578 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2579 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2580 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2581 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2582 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2583 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2584 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2585 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2586 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2587 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2588 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2589 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2590 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2591 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2592 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2593 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2594 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2595 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2596 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2597 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2598 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2599 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2608 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2609 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2610 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2611 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2612 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2613 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2614 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2615 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2624 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2625 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2626 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2627 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2628 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2629 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2630 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2631 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2640 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2641 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2642 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2643 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2644 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2645 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2646 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2647 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2656 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2657 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2658 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2659 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2660 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2661 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2662 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2663 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2672 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2673 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2674 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2675 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2676 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2677 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2678 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2679 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2688 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2689 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2690 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2691 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2692 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2693 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2694 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2695 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2704 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2705 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2706 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2707 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2708 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2709 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2710 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2711 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2720 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2721 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2722 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2723 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2724 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2725 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2726 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2727 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2736 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2737 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2738 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2739 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2740 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2741 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2742 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2743 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2752 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2753 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2754 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2755 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2756 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2757 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2758 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2759 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2768 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2769 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2770 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2771 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2772 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2773 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2774 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2775 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2784 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2785 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2786 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2787 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2788 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2789 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2790 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2791 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2800 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2801 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2802 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2803 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2804 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2805 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2806 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2807 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 36 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 37 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 38 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 39 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 40 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 41 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 42 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 43 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 44 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 45 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 46 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 47 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 48 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 49 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 50 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 51 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 52 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 53 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 54 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 55 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 56 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 57 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 58 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 59 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 60 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 61 +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s40, s33 +; GFX90A-V2A-DIS-NEXT: .cfi_register 65, 72 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-DIS-NEXT: .cfi_def_cfa_register 65 +; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x7100 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 28416 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 28160 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2602, 32, 17, 64, 27904 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2603, 32, 17, 64, 27648 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2604, 32, 17, 64, 27392 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2605, 32, 17, 64, 27136 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2606, 32, 17, 64, 26880 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2607, 32, 17, 64, 26624 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2616, 32, 17, 64, 26368 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2617, 32, 17, 64, 26112 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2618, 32, 17, 64, 25856 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2619, 32, 17, 64, 25600 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2620, 32, 17, 64, 25344 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2621, 32, 17, 64, 25088 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2622, 32, 17, 64, 24832 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2623, 32, 17, 64, 24576 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2632, 32, 17, 64, 24320 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2633, 32, 17, 64, 24064 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2634, 32, 17, 64, 23808 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2635, 32, 17, 64, 23552 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2636, 32, 17, 64, 23296 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2637, 32, 17, 64, 23040 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2638, 32, 17, 64, 22784 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2639, 32, 17, 64, 22528 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2648, 32, 17, 64, 22272 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2649, 32, 17, 64, 22016 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2650, 32, 17, 64, 21760 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2651, 32, 17, 64, 21504 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2652, 32, 17, 64, 21248 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2653, 32, 17, 64, 20992 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2654, 32, 17, 64, 20736 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2655, 32, 17, 64, 20480 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2664, 32, 17, 64, 20224 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2665, 32, 17, 64, 19968 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2666, 32, 17, 64, 19712 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2667, 32, 17, 64, 19456 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2668, 32, 17, 64, 19200 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2669, 32, 17, 64, 18944 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2670, 32, 17, 64, 18688 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2671, 32, 17, 64, 18432 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2680, 32, 17, 64, 18176 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2681, 32, 17, 64, 17920 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2682, 32, 17, 64, 17664 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2683, 32, 17, 64, 17408 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2684, 32, 17, 64, 17152 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2685, 32, 17, 64, 16896 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2686, 32, 17, 64, 16640 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2687, 32, 17, 64, 16384 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2696, 32, 17, 64, 16128 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2697, 32, 17, 64, 15872 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2698, 32, 17, 64, 15616 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2699, 32, 17, 64, 15360 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2700, 32, 17, 64, 15104 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2701, 32, 17, 64, 14848 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2702, 32, 17, 64, 14592 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2703, 32, 17, 64, 14336 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2712, 32, 17, 64, 14080 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2713, 32, 17, 64, 13824 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2714, 32, 17, 64, 13568 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2715, 32, 17, 64, 13312 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2716, 32, 17, 64, 13056 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2717, 32, 17, 64, 12800 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2718, 32, 17, 64, 12544 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2719, 32, 17, 64, 12288 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2728, 32, 17, 64, 12032 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2729, 32, 17, 64, 11776 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2730, 32, 17, 64, 11520 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2731, 32, 17, 64, 11264 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2732, 32, 17, 64, 11008 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2733, 32, 17, 64, 10752 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2734, 32, 17, 64, 10496 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2735, 32, 17, 64, 10240 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2744, 32, 17, 64, 9984 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2745, 32, 17, 64, 9728 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2746, 32, 17, 64, 9472 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2747, 32, 17, 64, 9216 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2748, 32, 17, 64, 8960 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2749, 32, 17, 64, 8704 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2750, 32, 17, 64, 8448 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2751, 32, 17, 64, 8192 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2760, 32, 17, 64, 7936 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2761, 32, 17, 64, 7680 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2762, 32, 17, 64, 7424 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2763, 32, 17, 64, 7168 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2764, 32, 17, 64, 6912 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2765, 32, 17, 64, 6656 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2766, 32, 17, 64, 6400 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2767, 32, 17, 64, 6144 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2776, 32, 17, 64, 5888 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2777, 32, 17, 64, 5632 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2778, 32, 17, 64, 5376 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2779, 32, 17, 64, 5120 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2780, 32, 17, 64, 4864 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2781, 32, 17, 64, 4608 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2782, 32, 17, 64, 4352 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2783, 32, 17, 64, 4096 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2792, 32, 17, 64, 3840 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2793, 32, 17, 64, 3584 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2794, 32, 17, 64, 3328 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2795, 32, 17, 64, 3072 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2796, 32, 17, 64, 2816 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2797, 32, 17, 64, 2560 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2798, 32, 17, 64, 2304 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2799, 32, 17, 64, 2048 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2808, 32, 17, 64, 1792 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2809, 32, 17, 64, 1536 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2810, 32, 17, 64, 1280 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2811, 32, 17, 64, 1024 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2812, 32, 17, 64, 768 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2813, 32, 17, 64, 512 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2814, 32, 17, 64, 256 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2815, 32, 17, 64, 0 +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber nonpreserved SGPRs +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber all VGPRs +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-DIS-NEXT: .cfi_def_cfa_register 64 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s40 +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: callee_need_to_spill_fp_to_memory: +; GFX90A-V2A-EN: .Lfunc_begin2: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: +; GFX90A-V2A-EN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2562 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2563 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2564 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2565 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2566 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2567 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2568 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2569 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2570 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2571 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2572 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2573 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2574 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2575 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2576 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2577 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2578 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2579 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2580 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2581 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2582 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2583 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2584 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2585 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2586 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2587 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2588 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2589 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2590 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2591 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2592 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2593 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2594 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2595 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2596 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2597 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2598 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2599 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2608 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2609 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2610 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2611 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2612 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2613 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2614 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2615 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2624 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2625 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2626 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2627 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2628 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2629 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2630 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2631 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2640 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2641 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2642 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2643 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2644 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2645 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2646 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2647 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2656 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2657 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2658 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2659 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2660 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2661 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2662 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2663 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2672 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2673 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2674 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2675 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2676 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2677 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2678 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2679 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2688 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2689 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2690 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2691 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2692 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2693 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2694 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2695 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2704 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2705 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2706 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2707 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2708 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2709 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2710 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2711 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2720 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2721 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2722 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2723 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2724 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2725 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2726 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2727 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2736 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2737 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2738 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2739 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2740 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2741 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2742 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2743 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2752 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2753 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2754 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2755 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2756 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2757 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2758 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2759 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2768 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2769 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2770 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2771 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2772 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2773 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2774 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2775 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2784 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2785 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2786 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2787 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2788 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2789 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2790 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2791 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2800 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2801 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2802 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2803 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2804 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2805 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2806 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2807 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3072 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3073 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3074 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3075 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3076 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3077 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3078 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3079 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3080 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3081 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3082 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3083 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3084 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3085 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3086 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3087 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3088 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3089 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3090 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3091 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3092 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3093 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3094 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3095 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3096 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3097 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3098 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3099 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3100 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3101 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3102 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3103 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 36 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 37 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 38 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 39 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 40 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 41 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 42 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 43 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 44 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 45 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 46 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 47 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 48 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 49 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 50 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 51 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 52 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 53 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 54 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 55 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 56 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 57 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 58 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 59 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 60 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 61 +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: s_mov_b32 s40, s33 +; GFX90A-V2A-EN-NEXT: .cfi_register 65, 72 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-EN-NEXT: .cfi_def_cfa_register 65 +; GFX90A-V2A-EN-NEXT: s_addk_i32 s32, 0x5100 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2600, 3072, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2601, 3073, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2602, 3074, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2603, 3075, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2604, 3076, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2605, 3077, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2606, 3078, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2607, 3079, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2616, 3080, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2617, 3081, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2618, 3082, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2619, 3083, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2620, 3084, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2621, 3085, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2622, 3086, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2623, 3087, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a16, v72 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2632, 3088, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a17, v73 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2633, 3089, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a18, v74 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2634, 3090, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a19, v75 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2635, 3091, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a20, v76 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2636, 3092, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a21, v77 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2637, 3093, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a22, v78 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2638, 3094, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a23, v79 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2639, 3095, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a24, v88 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2648, 3096, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a25, v89 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2649, 3097, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a26, v90 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2650, 3098, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a27, v91 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2651, 3099, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a28, v92 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2652, 3100, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a29, v93 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2653, 3101, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a30, v94 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2654, 3102, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a31, v95 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2655, 3103, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2664, 32, 17, 64, 20224 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2665, 32, 17, 64, 19968 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2666, 32, 17, 64, 19712 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2667, 32, 17, 64, 19456 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2668, 32, 17, 64, 19200 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2669, 32, 17, 64, 18944 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2670, 32, 17, 64, 18688 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2671, 32, 17, 64, 18432 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2680, 32, 17, 64, 18176 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2681, 32, 17, 64, 17920 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2682, 32, 17, 64, 17664 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2683, 32, 17, 64, 17408 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2684, 32, 17, 64, 17152 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2685, 32, 17, 64, 16896 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2686, 32, 17, 64, 16640 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2687, 32, 17, 64, 16384 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2696, 32, 17, 64, 16128 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2697, 32, 17, 64, 15872 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2698, 32, 17, 64, 15616 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2699, 32, 17, 64, 15360 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2700, 32, 17, 64, 15104 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2701, 32, 17, 64, 14848 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2702, 32, 17, 64, 14592 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2703, 32, 17, 64, 14336 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2712, 32, 17, 64, 14080 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2713, 32, 17, 64, 13824 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2714, 32, 17, 64, 13568 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2715, 32, 17, 64, 13312 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2716, 32, 17, 64, 13056 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2717, 32, 17, 64, 12800 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2718, 32, 17, 64, 12544 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2719, 32, 17, 64, 12288 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2728, 32, 17, 64, 12032 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2729, 32, 17, 64, 11776 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2730, 32, 17, 64, 11520 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2731, 32, 17, 64, 11264 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2732, 32, 17, 64, 11008 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2733, 32, 17, 64, 10752 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2734, 32, 17, 64, 10496 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2735, 32, 17, 64, 10240 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2744, 32, 17, 64, 9984 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2745, 32, 17, 64, 9728 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2746, 32, 17, 64, 9472 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2747, 32, 17, 64, 9216 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2748, 32, 17, 64, 8960 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2749, 32, 17, 64, 8704 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2750, 32, 17, 64, 8448 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2751, 32, 17, 64, 8192 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2760, 32, 17, 64, 7936 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2761, 32, 17, 64, 7680 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2762, 32, 17, 64, 7424 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2763, 32, 17, 64, 7168 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2764, 32, 17, 64, 6912 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2765, 32, 17, 64, 6656 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2766, 32, 17, 64, 6400 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2767, 32, 17, 64, 6144 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2776, 32, 17, 64, 5888 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2777, 32, 17, 64, 5632 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2778, 32, 17, 64, 5376 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2779, 32, 17, 64, 5120 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2780, 32, 17, 64, 4864 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2781, 32, 17, 64, 4608 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2782, 32, 17, 64, 4352 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2783, 32, 17, 64, 4096 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2792, 32, 17, 64, 3840 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2793, 32, 17, 64, 3584 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2794, 32, 17, 64, 3328 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2795, 32, 17, 64, 3072 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2796, 32, 17, 64, 2816 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2797, 32, 17, 64, 2560 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2798, 32, 17, 64, 2304 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2799, 32, 17, 64, 2048 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2808, 32, 17, 64, 1792 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2809, 32, 17, 64, 1536 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2810, 32, 17, 64, 1280 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2811, 32, 17, 64, 1024 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2812, 32, 17, 64, 768 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2813, 32, 17, 64, 512 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2814, 32, 17, 64, 256 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_offset 2815, 32, 17, 64, 0 +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber nonpreserved SGPRs +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber all VGPRs +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v95, a31 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v94, a30 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v93, a29 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v92, a28 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v91, a27 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v90, a26 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v89, a25 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v88, a24 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v79, a23 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v78, a22 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v77, a21 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v76, a20 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v75, a19 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v74, a18 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v73, a17 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v72, a16 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v63, a15 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-EN-NEXT: .cfi_def_cfa_register 64 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s40 +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: callee_need_to_spill_fp_to_memory: +; WAVE32: .Lfunc_begin2: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: .cfi_undefined 1561 +; WAVE32-NEXT: .cfi_undefined 1562 +; WAVE32-NEXT: .cfi_undefined 1563 +; WAVE32-NEXT: .cfi_undefined 1564 +; WAVE32-NEXT: .cfi_undefined 1565 +; WAVE32-NEXT: .cfi_undefined 1566 +; WAVE32-NEXT: .cfi_undefined 1567 +; WAVE32-NEXT: .cfi_undefined 1568 +; WAVE32-NEXT: .cfi_undefined 1569 +; WAVE32-NEXT: .cfi_undefined 1570 +; WAVE32-NEXT: .cfi_undefined 1571 +; WAVE32-NEXT: .cfi_undefined 1572 +; WAVE32-NEXT: .cfi_undefined 1573 +; WAVE32-NEXT: .cfi_undefined 1574 +; WAVE32-NEXT: .cfi_undefined 1575 +; WAVE32-NEXT: .cfi_undefined 1584 +; WAVE32-NEXT: .cfi_undefined 1585 +; WAVE32-NEXT: .cfi_undefined 1586 +; WAVE32-NEXT: .cfi_undefined 1587 +; WAVE32-NEXT: .cfi_undefined 1588 +; WAVE32-NEXT: .cfi_undefined 1589 +; WAVE32-NEXT: .cfi_undefined 1590 +; WAVE32-NEXT: .cfi_undefined 1591 +; WAVE32-NEXT: .cfi_undefined 1600 +; WAVE32-NEXT: .cfi_undefined 1601 +; WAVE32-NEXT: .cfi_undefined 1602 +; WAVE32-NEXT: .cfi_undefined 1603 +; WAVE32-NEXT: .cfi_undefined 1604 +; WAVE32-NEXT: .cfi_undefined 1605 +; WAVE32-NEXT: .cfi_undefined 1606 +; WAVE32-NEXT: .cfi_undefined 1607 +; WAVE32-NEXT: .cfi_undefined 1616 +; WAVE32-NEXT: .cfi_undefined 1617 +; WAVE32-NEXT: .cfi_undefined 1618 +; WAVE32-NEXT: .cfi_undefined 1619 +; WAVE32-NEXT: .cfi_undefined 1620 +; WAVE32-NEXT: .cfi_undefined 1621 +; WAVE32-NEXT: .cfi_undefined 1622 +; WAVE32-NEXT: .cfi_undefined 1623 +; WAVE32-NEXT: .cfi_undefined 1632 +; WAVE32-NEXT: .cfi_undefined 1633 +; WAVE32-NEXT: .cfi_undefined 1634 +; WAVE32-NEXT: .cfi_undefined 1635 +; WAVE32-NEXT: .cfi_undefined 1636 +; WAVE32-NEXT: .cfi_undefined 1637 +; WAVE32-NEXT: .cfi_undefined 1638 +; WAVE32-NEXT: .cfi_undefined 1639 +; WAVE32-NEXT: .cfi_undefined 1648 +; WAVE32-NEXT: .cfi_undefined 1649 +; WAVE32-NEXT: .cfi_undefined 1650 +; WAVE32-NEXT: .cfi_undefined 1651 +; WAVE32-NEXT: .cfi_undefined 1652 +; WAVE32-NEXT: .cfi_undefined 1653 +; WAVE32-NEXT: .cfi_undefined 1654 +; WAVE32-NEXT: .cfi_undefined 1655 +; WAVE32-NEXT: .cfi_undefined 1664 +; WAVE32-NEXT: .cfi_undefined 1665 +; WAVE32-NEXT: .cfi_undefined 1666 +; WAVE32-NEXT: .cfi_undefined 1667 +; WAVE32-NEXT: .cfi_undefined 1668 +; WAVE32-NEXT: .cfi_undefined 1669 +; WAVE32-NEXT: .cfi_undefined 1670 +; WAVE32-NEXT: .cfi_undefined 1671 +; WAVE32-NEXT: .cfi_undefined 1680 +; WAVE32-NEXT: .cfi_undefined 1681 +; WAVE32-NEXT: .cfi_undefined 1682 +; WAVE32-NEXT: .cfi_undefined 1683 +; WAVE32-NEXT: .cfi_undefined 1684 +; WAVE32-NEXT: .cfi_undefined 1685 +; WAVE32-NEXT: .cfi_undefined 1686 +; WAVE32-NEXT: .cfi_undefined 1687 +; WAVE32-NEXT: .cfi_undefined 1696 +; WAVE32-NEXT: .cfi_undefined 1697 +; WAVE32-NEXT: .cfi_undefined 1698 +; WAVE32-NEXT: .cfi_undefined 1699 +; WAVE32-NEXT: .cfi_undefined 1700 +; WAVE32-NEXT: .cfi_undefined 1701 +; WAVE32-NEXT: .cfi_undefined 1702 +; WAVE32-NEXT: .cfi_undefined 1703 +; WAVE32-NEXT: .cfi_undefined 1712 +; WAVE32-NEXT: .cfi_undefined 1713 +; WAVE32-NEXT: .cfi_undefined 1714 +; WAVE32-NEXT: .cfi_undefined 1715 +; WAVE32-NEXT: .cfi_undefined 1716 +; WAVE32-NEXT: .cfi_undefined 1717 +; WAVE32-NEXT: .cfi_undefined 1718 +; WAVE32-NEXT: .cfi_undefined 1719 +; WAVE32-NEXT: .cfi_undefined 1728 +; WAVE32-NEXT: .cfi_undefined 1729 +; WAVE32-NEXT: .cfi_undefined 1730 +; WAVE32-NEXT: .cfi_undefined 1731 +; WAVE32-NEXT: .cfi_undefined 1732 +; WAVE32-NEXT: .cfi_undefined 1733 +; WAVE32-NEXT: .cfi_undefined 1734 +; WAVE32-NEXT: .cfi_undefined 1735 +; WAVE32-NEXT: .cfi_undefined 1744 +; WAVE32-NEXT: .cfi_undefined 1745 +; WAVE32-NEXT: .cfi_undefined 1746 +; WAVE32-NEXT: .cfi_undefined 1747 +; WAVE32-NEXT: .cfi_undefined 1748 +; WAVE32-NEXT: .cfi_undefined 1749 +; WAVE32-NEXT: .cfi_undefined 1750 +; WAVE32-NEXT: .cfi_undefined 1751 +; WAVE32-NEXT: .cfi_undefined 1760 +; WAVE32-NEXT: .cfi_undefined 1761 +; WAVE32-NEXT: .cfi_undefined 1762 +; WAVE32-NEXT: .cfi_undefined 1763 +; WAVE32-NEXT: .cfi_undefined 1764 +; WAVE32-NEXT: .cfi_undefined 1765 +; WAVE32-NEXT: .cfi_undefined 1766 +; WAVE32-NEXT: .cfi_undefined 1767 +; WAVE32-NEXT: .cfi_undefined 1776 +; WAVE32-NEXT: .cfi_undefined 1777 +; WAVE32-NEXT: .cfi_undefined 1778 +; WAVE32-NEXT: .cfi_undefined 1779 +; WAVE32-NEXT: .cfi_undefined 1780 +; WAVE32-NEXT: .cfi_undefined 1781 +; WAVE32-NEXT: .cfi_undefined 1782 +; WAVE32-NEXT: .cfi_undefined 1783 +; WAVE32-NEXT: .cfi_undefined 36 +; WAVE32-NEXT: .cfi_undefined 37 +; WAVE32-NEXT: .cfi_undefined 38 +; WAVE32-NEXT: .cfi_undefined 39 +; WAVE32-NEXT: .cfi_undefined 40 +; WAVE32-NEXT: .cfi_undefined 41 +; WAVE32-NEXT: .cfi_undefined 42 +; WAVE32-NEXT: .cfi_undefined 43 +; WAVE32-NEXT: .cfi_undefined 44 +; WAVE32-NEXT: .cfi_undefined 45 +; WAVE32-NEXT: .cfi_undefined 46 +; WAVE32-NEXT: .cfi_undefined 47 +; WAVE32-NEXT: .cfi_undefined 48 +; WAVE32-NEXT: .cfi_undefined 49 +; WAVE32-NEXT: .cfi_undefined 50 +; WAVE32-NEXT: .cfi_undefined 51 +; WAVE32-NEXT: .cfi_undefined 52 +; WAVE32-NEXT: .cfi_undefined 53 +; WAVE32-NEXT: .cfi_undefined 54 +; WAVE32-NEXT: .cfi_undefined 55 +; WAVE32-NEXT: .cfi_undefined 56 +; WAVE32-NEXT: .cfi_undefined 57 +; WAVE32-NEXT: .cfi_undefined 58 +; WAVE32-NEXT: .cfi_undefined 59 +; WAVE32-NEXT: .cfi_undefined 60 +; WAVE32-NEXT: .cfi_undefined 61 +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_mov_b32 s40, s33 +; WAVE32-NEXT: .cfi_register 65, 72 +; WAVE32-NEXT: s_mov_b32 s33, s32 +; WAVE32-NEXT: .cfi_def_cfa_register 65 +; WAVE32-NEXT: s_addk_i32 s32, 0x3880 +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1576, 32, 1, 32, 14208 +; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1577, 32, 1, 32, 14080 +; WAVE32-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1578, 32, 1, 32, 13952 +; WAVE32-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1579, 32, 1, 32, 13824 +; WAVE32-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1580, 32, 1, 32, 13696 +; WAVE32-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1581, 32, 1, 32, 13568 +; WAVE32-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1582, 32, 1, 32, 13440 +; WAVE32-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1583, 32, 1, 32, 13312 +; WAVE32-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1592, 32, 1, 32, 13184 +; WAVE32-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1593, 32, 1, 32, 13056 +; WAVE32-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1594, 32, 1, 32, 12928 +; WAVE32-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1595, 32, 1, 32, 12800 +; WAVE32-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1596, 32, 1, 32, 12672 +; WAVE32-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1597, 32, 1, 32, 12544 +; WAVE32-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1598, 32, 1, 32, 12416 +; WAVE32-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1599, 32, 1, 32, 12288 +; WAVE32-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1608, 32, 1, 32, 12160 +; WAVE32-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1609, 32, 1, 32, 12032 +; WAVE32-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1610, 32, 1, 32, 11904 +; WAVE32-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1611, 32, 1, 32, 11776 +; WAVE32-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1612, 32, 1, 32, 11648 +; WAVE32-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1613, 32, 1, 32, 11520 +; WAVE32-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1614, 32, 1, 32, 11392 +; WAVE32-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1615, 32, 1, 32, 11264 +; WAVE32-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1624, 32, 1, 32, 11136 +; WAVE32-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1625, 32, 1, 32, 11008 +; WAVE32-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1626, 32, 1, 32, 10880 +; WAVE32-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1627, 32, 1, 32, 10752 +; WAVE32-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1628, 32, 1, 32, 10624 +; WAVE32-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1629, 32, 1, 32, 10496 +; WAVE32-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1630, 32, 1, 32, 10368 +; WAVE32-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1631, 32, 1, 32, 10240 +; WAVE32-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1640, 32, 1, 32, 10112 +; WAVE32-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1641, 32, 1, 32, 9984 +; WAVE32-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1642, 32, 1, 32, 9856 +; WAVE32-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1643, 32, 1, 32, 9728 +; WAVE32-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1644, 32, 1, 32, 9600 +; WAVE32-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1645, 32, 1, 32, 9472 +; WAVE32-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1646, 32, 1, 32, 9344 +; WAVE32-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1647, 32, 1, 32, 9216 +; WAVE32-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1656, 32, 1, 32, 9088 +; WAVE32-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1657, 32, 1, 32, 8960 +; WAVE32-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1658, 32, 1, 32, 8832 +; WAVE32-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1659, 32, 1, 32, 8704 +; WAVE32-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1660, 32, 1, 32, 8576 +; WAVE32-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1661, 32, 1, 32, 8448 +; WAVE32-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1662, 32, 1, 32, 8320 +; WAVE32-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1663, 32, 1, 32, 8192 +; WAVE32-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1672, 32, 1, 32, 8064 +; WAVE32-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1673, 32, 1, 32, 7936 +; WAVE32-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1674, 32, 1, 32, 7808 +; WAVE32-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1675, 32, 1, 32, 7680 +; WAVE32-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1676, 32, 1, 32, 7552 +; WAVE32-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1677, 32, 1, 32, 7424 +; WAVE32-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1678, 32, 1, 32, 7296 +; WAVE32-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1679, 32, 1, 32, 7168 +; WAVE32-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1688, 32, 1, 32, 7040 +; WAVE32-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1689, 32, 1, 32, 6912 +; WAVE32-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1690, 32, 1, 32, 6784 +; WAVE32-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1691, 32, 1, 32, 6656 +; WAVE32-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1692, 32, 1, 32, 6528 +; WAVE32-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1693, 32, 1, 32, 6400 +; WAVE32-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1694, 32, 1, 32, 6272 +; WAVE32-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1695, 32, 1, 32, 6144 +; WAVE32-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1704, 32, 1, 32, 6016 +; WAVE32-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1705, 32, 1, 32, 5888 +; WAVE32-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1706, 32, 1, 32, 5760 +; WAVE32-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1707, 32, 1, 32, 5632 +; WAVE32-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1708, 32, 1, 32, 5504 +; WAVE32-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1709, 32, 1, 32, 5376 +; WAVE32-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1710, 32, 1, 32, 5248 +; WAVE32-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1711, 32, 1, 32, 5120 +; WAVE32-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1720, 32, 1, 32, 4992 +; WAVE32-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1721, 32, 1, 32, 4864 +; WAVE32-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1722, 32, 1, 32, 4736 +; WAVE32-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1723, 32, 1, 32, 4608 +; WAVE32-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1724, 32, 1, 32, 4480 +; WAVE32-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1725, 32, 1, 32, 4352 +; WAVE32-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1726, 32, 1, 32, 4224 +; WAVE32-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1727, 32, 1, 32, 4096 +; WAVE32-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1736, 32, 1, 32, 3968 +; WAVE32-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1737, 32, 1, 32, 3840 +; WAVE32-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1738, 32, 1, 32, 3712 +; WAVE32-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1739, 32, 1, 32, 3584 +; WAVE32-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1740, 32, 1, 32, 3456 +; WAVE32-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1741, 32, 1, 32, 3328 +; WAVE32-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1742, 32, 1, 32, 3200 +; WAVE32-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1743, 32, 1, 32, 3072 +; WAVE32-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1752, 32, 1, 32, 2944 +; WAVE32-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1753, 32, 1, 32, 2816 +; WAVE32-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1754, 32, 1, 32, 2688 +; WAVE32-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1755, 32, 1, 32, 2560 +; WAVE32-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1756, 32, 1, 32, 2432 +; WAVE32-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1757, 32, 1, 32, 2304 +; WAVE32-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1758, 32, 1, 32, 2176 +; WAVE32-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1759, 32, 1, 32, 2048 +; WAVE32-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1768, 32, 1, 32, 1920 +; WAVE32-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1769, 32, 1, 32, 1792 +; WAVE32-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1770, 32, 1, 32, 1664 +; WAVE32-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1771, 32, 1, 32, 1536 +; WAVE32-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1772, 32, 1, 32, 1408 +; WAVE32-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1773, 32, 1, 32, 1280 +; WAVE32-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1774, 32, 1, 32, 1152 +; WAVE32-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1775, 32, 1, 32, 1024 +; WAVE32-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1784, 32, 1, 32, 896 +; WAVE32-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1785, 32, 1, 32, 768 +; WAVE32-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1786, 32, 1, 32, 640 +; WAVE32-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1787, 32, 1, 32, 512 +; WAVE32-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1788, 32, 1, 32, 384 +; WAVE32-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1789, 32, 1, 32, 256 +; WAVE32-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1790, 32, 1, 32, 128 +; WAVE32-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1791, 32, 1, 32, 0 +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber nonpreserved SGPRs +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber all VGPRs +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_clause 0x3e +; WAVE32-NEXT: buffer_load_dword v255, off, s[0:3], s33 +; WAVE32-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 +; WAVE32-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 +; WAVE32-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 +; WAVE32-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 +; WAVE32-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 +; WAVE32-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 +; WAVE32-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 +; WAVE32-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 +; WAVE32-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 +; WAVE32-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 +; WAVE32-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 +; WAVE32-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 +; WAVE32-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 +; WAVE32-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 +; WAVE32-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 +; WAVE32-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 +; WAVE32-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 +; WAVE32-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 +; WAVE32-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 +; WAVE32-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 +; WAVE32-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 +; WAVE32-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 +; WAVE32-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 +; WAVE32-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 +; WAVE32-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 +; WAVE32-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 +; WAVE32-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 +; WAVE32-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 +; WAVE32-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 +; WAVE32-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 +; WAVE32-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 +; WAVE32-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 +; WAVE32-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 +; WAVE32-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 +; WAVE32-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 +; WAVE32-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 +; WAVE32-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 +; WAVE32-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 +; WAVE32-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 +; WAVE32-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 +; WAVE32-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 +; WAVE32-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 +; WAVE32-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 +; WAVE32-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 +; WAVE32-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 +; WAVE32-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 +; WAVE32-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 +; WAVE32-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 +; WAVE32-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 +; WAVE32-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 +; WAVE32-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 +; WAVE32-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 +; WAVE32-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 +; WAVE32-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 +; WAVE32-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 +; WAVE32-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 +; WAVE32-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 +; WAVE32-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 +; WAVE32-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 +; WAVE32-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 +; WAVE32-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 +; WAVE32-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 +; WAVE32-NEXT: s_clause 0x30 +; WAVE32-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 +; WAVE32-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 +; WAVE32-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 +; WAVE32-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 +; WAVE32-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 +; WAVE32-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 +; WAVE32-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 +; WAVE32-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 +; WAVE32-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 +; WAVE32-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 +; WAVE32-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 +; WAVE32-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 +; WAVE32-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 +; WAVE32-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 +; WAVE32-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 +; WAVE32-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 +; WAVE32-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 +; WAVE32-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 +; WAVE32-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 +; WAVE32-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 +; WAVE32-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 +; WAVE32-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 +; WAVE32-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 +; WAVE32-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 +; WAVE32-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 +; WAVE32-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 +; WAVE32-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 +; WAVE32-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 +; WAVE32-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 +; WAVE32-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 +; WAVE32-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 +; WAVE32-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 +; WAVE32-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 +; WAVE32-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 +; WAVE32-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 +; WAVE32-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 +; WAVE32-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 +; WAVE32-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 +; WAVE32-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 +; WAVE32-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 +; WAVE32-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 +; WAVE32-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 +; WAVE32-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 +; WAVE32-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 +; WAVE32-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 +; WAVE32-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 +; WAVE32-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 +; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 +; WAVE32-NEXT: s_mov_b32 s32, s33 +; WAVE32-NEXT: .cfi_def_cfa_register 64 +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 s33, s40 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] + call void asm sideeffect "; clobber nonpreserved SGPRs", + "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} + ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19} + ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29} + ,~{vcc}"() + + call void asm sideeffect "; clobber all VGPRs", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} + ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} + ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39} + ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49} + ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59} + ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69} + ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79} + ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89} + ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99} + ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109} + ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119} + ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129} + ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139} + ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149} + ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159} + ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169} + ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179} + ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189} + ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199} + ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209} + ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219} + ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229} + ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239} + ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249} + ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}"() + ret void +} + +declare hidden void @ex() #0 + +define hidden void @func_call_clobber() #0 { +; GFX900-LABEL: func_call_clobber: +; GFX900: .Lfunc_begin3: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: ; %entry +; GFX900-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX900-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX900-NEXT: .cfi_undefined 2560 +; GFX900-NEXT: .cfi_undefined 2561 +; GFX900-NEXT: .cfi_undefined 2562 +; GFX900-NEXT: .cfi_undefined 2563 +; GFX900-NEXT: .cfi_undefined 2564 +; GFX900-NEXT: .cfi_undefined 2565 +; GFX900-NEXT: .cfi_undefined 2566 +; GFX900-NEXT: .cfi_undefined 2567 +; GFX900-NEXT: .cfi_undefined 2568 +; GFX900-NEXT: .cfi_undefined 2569 +; GFX900-NEXT: .cfi_undefined 2570 +; GFX900-NEXT: .cfi_undefined 2571 +; GFX900-NEXT: .cfi_undefined 2572 +; GFX900-NEXT: .cfi_undefined 2573 +; GFX900-NEXT: .cfi_undefined 2574 +; GFX900-NEXT: .cfi_undefined 2575 +; GFX900-NEXT: .cfi_undefined 2576 +; GFX900-NEXT: .cfi_undefined 2577 +; GFX900-NEXT: .cfi_undefined 2578 +; GFX900-NEXT: .cfi_undefined 2579 +; GFX900-NEXT: .cfi_undefined 2580 +; GFX900-NEXT: .cfi_undefined 2581 +; GFX900-NEXT: .cfi_undefined 2582 +; GFX900-NEXT: .cfi_undefined 2583 +; GFX900-NEXT: .cfi_undefined 2584 +; GFX900-NEXT: .cfi_undefined 2585 +; GFX900-NEXT: .cfi_undefined 2586 +; GFX900-NEXT: .cfi_undefined 2587 +; GFX900-NEXT: .cfi_undefined 2588 +; GFX900-NEXT: .cfi_undefined 2589 +; GFX900-NEXT: .cfi_undefined 2590 +; GFX900-NEXT: .cfi_undefined 2591 +; GFX900-NEXT: .cfi_undefined 2592 +; GFX900-NEXT: .cfi_undefined 2593 +; GFX900-NEXT: .cfi_undefined 2594 +; GFX900-NEXT: .cfi_undefined 2595 +; GFX900-NEXT: .cfi_undefined 2596 +; GFX900-NEXT: .cfi_undefined 2597 +; GFX900-NEXT: .cfi_undefined 2598 +; GFX900-NEXT: .cfi_undefined 2599 +; GFX900-NEXT: .cfi_undefined 2608 +; GFX900-NEXT: .cfi_undefined 2609 +; GFX900-NEXT: .cfi_undefined 2610 +; GFX900-NEXT: .cfi_undefined 2611 +; GFX900-NEXT: .cfi_undefined 2612 +; GFX900-NEXT: .cfi_undefined 2613 +; GFX900-NEXT: .cfi_undefined 2614 +; GFX900-NEXT: .cfi_undefined 2615 +; GFX900-NEXT: .cfi_undefined 2624 +; GFX900-NEXT: .cfi_undefined 2625 +; GFX900-NEXT: .cfi_undefined 2626 +; GFX900-NEXT: .cfi_undefined 2627 +; GFX900-NEXT: .cfi_undefined 2628 +; GFX900-NEXT: .cfi_undefined 2629 +; GFX900-NEXT: .cfi_undefined 2630 +; GFX900-NEXT: .cfi_undefined 2631 +; GFX900-NEXT: .cfi_undefined 2640 +; GFX900-NEXT: .cfi_undefined 2641 +; GFX900-NEXT: .cfi_undefined 2642 +; GFX900-NEXT: .cfi_undefined 2643 +; GFX900-NEXT: .cfi_undefined 2644 +; GFX900-NEXT: .cfi_undefined 2645 +; GFX900-NEXT: .cfi_undefined 2646 +; GFX900-NEXT: .cfi_undefined 2647 +; GFX900-NEXT: .cfi_undefined 2656 +; GFX900-NEXT: .cfi_undefined 2657 +; GFX900-NEXT: .cfi_undefined 2658 +; GFX900-NEXT: .cfi_undefined 2659 +; GFX900-NEXT: .cfi_undefined 2660 +; GFX900-NEXT: .cfi_undefined 2661 +; GFX900-NEXT: .cfi_undefined 2662 +; GFX900-NEXT: .cfi_undefined 2663 +; GFX900-NEXT: .cfi_undefined 2672 +; GFX900-NEXT: .cfi_undefined 2673 +; GFX900-NEXT: .cfi_undefined 2674 +; GFX900-NEXT: .cfi_undefined 2675 +; GFX900-NEXT: .cfi_undefined 2676 +; GFX900-NEXT: .cfi_undefined 2677 +; GFX900-NEXT: .cfi_undefined 2678 +; GFX900-NEXT: .cfi_undefined 2679 +; GFX900-NEXT: .cfi_undefined 2688 +; GFX900-NEXT: .cfi_undefined 2689 +; GFX900-NEXT: .cfi_undefined 2690 +; GFX900-NEXT: .cfi_undefined 2691 +; GFX900-NEXT: .cfi_undefined 2692 +; GFX900-NEXT: .cfi_undefined 2693 +; GFX900-NEXT: .cfi_undefined 2694 +; GFX900-NEXT: .cfi_undefined 2695 +; GFX900-NEXT: .cfi_undefined 2704 +; GFX900-NEXT: .cfi_undefined 2705 +; GFX900-NEXT: .cfi_undefined 2706 +; GFX900-NEXT: .cfi_undefined 2707 +; GFX900-NEXT: .cfi_undefined 2708 +; GFX900-NEXT: .cfi_undefined 2709 +; GFX900-NEXT: .cfi_undefined 2710 +; GFX900-NEXT: .cfi_undefined 2711 +; GFX900-NEXT: .cfi_undefined 2720 +; GFX900-NEXT: .cfi_undefined 2721 +; GFX900-NEXT: .cfi_undefined 2722 +; GFX900-NEXT: .cfi_undefined 2723 +; GFX900-NEXT: .cfi_undefined 2724 +; GFX900-NEXT: .cfi_undefined 2725 +; GFX900-NEXT: .cfi_undefined 2726 +; GFX900-NEXT: .cfi_undefined 2727 +; GFX900-NEXT: .cfi_undefined 2736 +; GFX900-NEXT: .cfi_undefined 2737 +; GFX900-NEXT: .cfi_undefined 2738 +; GFX900-NEXT: .cfi_undefined 2739 +; GFX900-NEXT: .cfi_undefined 2740 +; GFX900-NEXT: .cfi_undefined 2741 +; GFX900-NEXT: .cfi_undefined 2742 +; GFX900-NEXT: .cfi_undefined 2743 +; GFX900-NEXT: .cfi_undefined 2752 +; GFX900-NEXT: .cfi_undefined 2753 +; GFX900-NEXT: .cfi_undefined 2754 +; GFX900-NEXT: .cfi_undefined 2755 +; GFX900-NEXT: .cfi_undefined 2756 +; GFX900-NEXT: .cfi_undefined 2757 +; GFX900-NEXT: .cfi_undefined 2758 +; GFX900-NEXT: .cfi_undefined 2759 +; GFX900-NEXT: .cfi_undefined 2768 +; GFX900-NEXT: .cfi_undefined 2769 +; GFX900-NEXT: .cfi_undefined 2770 +; GFX900-NEXT: .cfi_undefined 2771 +; GFX900-NEXT: .cfi_undefined 2772 +; GFX900-NEXT: .cfi_undefined 2773 +; GFX900-NEXT: .cfi_undefined 2774 +; GFX900-NEXT: .cfi_undefined 2775 +; GFX900-NEXT: .cfi_undefined 2784 +; GFX900-NEXT: .cfi_undefined 2785 +; GFX900-NEXT: .cfi_undefined 2786 +; GFX900-NEXT: .cfi_undefined 2787 +; GFX900-NEXT: .cfi_undefined 2788 +; GFX900-NEXT: .cfi_undefined 2789 +; GFX900-NEXT: .cfi_undefined 2790 +; GFX900-NEXT: .cfi_undefined 2791 +; GFX900-NEXT: .cfi_undefined 2800 +; GFX900-NEXT: .cfi_undefined 2801 +; GFX900-NEXT: .cfi_undefined 2802 +; GFX900-NEXT: .cfi_undefined 2803 +; GFX900-NEXT: .cfi_undefined 2804 +; GFX900-NEXT: .cfi_undefined 2805 +; GFX900-NEXT: .cfi_undefined 2806 +; GFX900-NEXT: .cfi_undefined 2807 +; GFX900-NEXT: .cfi_undefined 32 +; GFX900-NEXT: .cfi_undefined 33 +; GFX900-NEXT: .cfi_undefined 34 +; GFX900-NEXT: .cfi_undefined 35 +; GFX900-NEXT: .cfi_undefined 36 +; GFX900-NEXT: .cfi_undefined 37 +; GFX900-NEXT: .cfi_undefined 38 +; GFX900-NEXT: .cfi_undefined 39 +; GFX900-NEXT: .cfi_undefined 40 +; GFX900-NEXT: .cfi_undefined 41 +; GFX900-NEXT: .cfi_undefined 42 +; GFX900-NEXT: .cfi_undefined 43 +; GFX900-NEXT: .cfi_undefined 44 +; GFX900-NEXT: .cfi_undefined 45 +; GFX900-NEXT: .cfi_undefined 46 +; GFX900-NEXT: .cfi_undefined 47 +; GFX900-NEXT: .cfi_undefined 48 +; GFX900-NEXT: .cfi_undefined 49 +; GFX900-NEXT: .cfi_undefined 50 +; GFX900-NEXT: .cfi_undefined 51 +; GFX900-NEXT: .cfi_undefined 52 +; GFX900-NEXT: .cfi_undefined 53 +; GFX900-NEXT: .cfi_undefined 54 +; GFX900-NEXT: .cfi_undefined 55 +; GFX900-NEXT: .cfi_undefined 56 +; GFX900-NEXT: .cfi_undefined 57 +; GFX900-NEXT: .cfi_undefined 58 +; GFX900-NEXT: .cfi_undefined 59 +; GFX900-NEXT: .cfi_undefined 60 +; GFX900-NEXT: .cfi_undefined 61 +; GFX900-NEXT: .cfi_undefined 72 +; GFX900-NEXT: .cfi_undefined 73 +; GFX900-NEXT: .cfi_undefined 74 +; GFX900-NEXT: .cfi_undefined 75 +; GFX900-NEXT: .cfi_undefined 76 +; GFX900-NEXT: .cfi_undefined 77 +; GFX900-NEXT: .cfi_undefined 78 +; GFX900-NEXT: .cfi_undefined 79 +; GFX900-NEXT: .cfi_undefined 88 +; GFX900-NEXT: .cfi_undefined 89 +; GFX900-NEXT: .cfi_undefined 90 +; GFX900-NEXT: .cfi_undefined 91 +; GFX900-NEXT: .cfi_undefined 92 +; GFX900-NEXT: .cfi_undefined 93 +; GFX900-NEXT: .cfi_undefined 94 +; GFX900-NEXT: .cfi_undefined 95 +; GFX900-NEXT: .cfi_undefined 1096 +; GFX900-NEXT: .cfi_undefined 1097 +; GFX900-NEXT: .cfi_undefined 1098 +; GFX900-NEXT: .cfi_undefined 1099 +; GFX900-NEXT: .cfi_undefined 1100 +; GFX900-NEXT: .cfi_undefined 1101 +; GFX900-NEXT: .cfi_undefined 1102 +; GFX900-NEXT: .cfi_undefined 1103 +; GFX900-NEXT: .cfi_undefined 1112 +; GFX900-NEXT: .cfi_undefined 1113 +; GFX900-NEXT: .cfi_undefined 1114 +; GFX900-NEXT: .cfi_undefined 1115 +; GFX900-NEXT: .cfi_undefined 1116 +; GFX900-NEXT: .cfi_undefined 1117 +; GFX900-NEXT: .cfi_undefined 1118 +; GFX900-NEXT: .cfi_undefined 1119 +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s16, s33 +; GFX900-NEXT: s_mov_b32 s33, s32 +; GFX900-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_offset 2600, 0 +; GFX900-NEXT: s_mov_b64 exec, s[18:19] +; GFX900-NEXT: v_writelane_b32 v40, s16, 2 +; GFX900-NEXT: .cfi_llvm_vector_registers 65, 2600, 2, 32 +; GFX900-NEXT: .cfi_def_cfa_register 65 +; GFX900-NEXT: v_writelane_b32 v40, s30, 0 +; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v40, s31, 1 +; GFX900-NEXT: .cfi_llvm_vector_registers 16, 2815, 0, 32, 2815, 1, 32 +; GFX900-NEXT: s_getpc_b64 s[16:17] +; GFX900-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX900-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s30, v40, 0 +; GFX900-NEXT: v_readlane_b32 s31, v40, 1 +; GFX900-NEXT: s_mov_b32 s32, s33 +; GFX900-NEXT: v_readlane_b32 s4, v40, 2 +; GFX900-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[6:7] +; GFX900-NEXT: .cfi_def_cfa_register 64 +; GFX900-NEXT: s_mov_b32 s33, s4 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: func_call_clobber: +; GFX90A-V2A-DIS: .Lfunc_begin3: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2562 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2563 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2564 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2565 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2566 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2567 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2568 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2569 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2570 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2571 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2572 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2573 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2574 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2575 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2576 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2577 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2578 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2579 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2580 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2581 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2582 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2583 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2584 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2585 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2586 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2587 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2588 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2589 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2590 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2591 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2592 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2593 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2594 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2595 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2596 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2597 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2598 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2599 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2608 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2609 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2610 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2611 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2612 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2613 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2614 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2615 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2624 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2625 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2626 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2627 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2628 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2629 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2630 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2631 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2640 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2641 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2642 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2643 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2644 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2645 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2646 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2647 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2656 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2657 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2658 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2659 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2660 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2661 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2662 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2663 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2672 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2673 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2674 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2675 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2676 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2677 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2678 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2679 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2688 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2689 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2690 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2691 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2692 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2693 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2694 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2695 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2704 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2705 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2706 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2707 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2708 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2709 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2710 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2711 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2720 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2721 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2722 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2723 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2724 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2725 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2726 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2727 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2736 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2737 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2738 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2739 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2740 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2741 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2742 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2743 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2752 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2753 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2754 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2755 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2756 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2757 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2758 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2759 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2768 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2769 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2770 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2771 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2772 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2773 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2774 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2775 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2784 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2785 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2786 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2787 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2788 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2789 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2790 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2791 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2800 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2801 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2802 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2803 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2804 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2805 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2806 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 2807 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3072 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3073 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3074 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3075 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3076 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3077 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3078 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3079 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3080 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3081 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3082 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3083 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3084 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3085 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3086 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3087 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3088 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3089 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3090 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3091 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3092 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3093 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3094 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3095 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3096 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3097 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3098 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3099 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3100 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3101 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3102 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 3103 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 32 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 33 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 34 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 35 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 36 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 37 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 38 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 39 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 40 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 41 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 42 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 43 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 44 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 45 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 46 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 47 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 48 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 49 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 50 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 51 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 52 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 53 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 54 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 55 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 56 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 57 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 58 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 59 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 60 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 61 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 72 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 73 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 74 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 75 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 76 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 77 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 78 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 79 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 88 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 89 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 90 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 91 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 92 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 93 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 94 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 95 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1096 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1097 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1098 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1099 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1100 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1101 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1102 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1103 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1112 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1113 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1114 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1115 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1116 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1117 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1118 +; GFX90A-V2A-DIS-NEXT: .cfi_undefined 1119 +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s16, s33 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-DIS-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_offset 2600, 0 +; GFX90A-V2A-DIS-NEXT: s_mov_b64 exec, s[18:19] +; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_registers 65, 2600, 2, 32 +; GFX90A-V2A-DIS-NEXT: .cfi_def_cfa_register 65 +; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_registers 16, 2815, 0, 32, 2815, 1, 32 +; GFX90A-V2A-DIS-NEXT: s_getpc_b64 s[16:17] +; GFX90A-V2A-DIS-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX90A-V2A-DIS-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX90A-V2A-DIS-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s30, v40, 0 +; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s31, v40, 1 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s4, v40, 2 +; GFX90A-V2A-DIS-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_mov_b64 exec, s[6:7] +; GFX90A-V2A-DIS-NEXT: .cfi_def_cfa_register 64 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s4 +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: func_call_clobber: +; GFX90A-V2A-EN: .Lfunc_begin3: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-EN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2562 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2563 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2564 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2565 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2566 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2567 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2568 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2569 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2570 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2571 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2572 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2573 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2574 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2575 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2576 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2577 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2578 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2579 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2580 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2581 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2582 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2583 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2584 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2585 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2586 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2587 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2588 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2589 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2590 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2591 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2592 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2593 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2594 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2595 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2596 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2597 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2598 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2599 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2608 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2609 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2610 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2611 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2612 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2613 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2614 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2615 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2624 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2625 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2626 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2627 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2628 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2629 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2630 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2631 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2640 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2641 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2642 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2643 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2644 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2645 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2646 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2647 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2656 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2657 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2658 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2659 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2660 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2661 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2662 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2663 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2672 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2673 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2674 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2675 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2676 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2677 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2678 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2679 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2688 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2689 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2690 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2691 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2692 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2693 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2694 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2695 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2704 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2705 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2706 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2707 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2708 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2709 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2710 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2711 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2720 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2721 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2722 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2723 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2724 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2725 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2726 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2727 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2736 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2737 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2738 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2739 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2740 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2741 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2742 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2743 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2752 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2753 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2754 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2755 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2756 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2757 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2758 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2759 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2768 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2769 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2770 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2771 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2772 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2773 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2774 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2775 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2784 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2785 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2786 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2787 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2788 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2789 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2790 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2791 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2800 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2801 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2802 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2803 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2804 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2805 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2806 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2807 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3072 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3073 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3074 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3075 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3076 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3077 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3078 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3079 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3080 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3081 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3082 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3083 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3084 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3085 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3086 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3087 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3088 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3089 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3090 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3091 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3092 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3093 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3094 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3095 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3096 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3097 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3098 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3099 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3100 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3101 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3102 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3103 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 32 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 33 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 34 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 35 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 36 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 37 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 38 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 39 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 40 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 41 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 42 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 43 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 44 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 45 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 46 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 47 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 48 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 49 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 50 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 51 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 52 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 53 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 54 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 55 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 56 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 57 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 58 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 59 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 60 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 61 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 72 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 73 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 74 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 75 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 76 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 77 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 78 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 79 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 88 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 89 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 90 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 91 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 92 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 93 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 94 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 95 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1096 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1097 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1098 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1099 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1100 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1101 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1102 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1103 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1112 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1113 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1114 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1115 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1116 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1117 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1118 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 1119 +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: s_mov_b32 s16, s33 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-EN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: .cfi_offset 2600, 0 +; GFX90A-V2A-EN-NEXT: s_mov_b64 exec, s[18:19] +; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_registers 65, 2600, 2, 32 +; GFX90A-V2A-EN-NEXT: .cfi_def_cfa_register 65 +; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-V2A-EN-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_registers 16, 2815, 0, 32, 2815, 1, 32 +; GFX90A-V2A-EN-NEXT: s_getpc_b64 s[16:17] +; GFX90A-V2A-EN-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX90A-V2A-EN-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX90A-V2A-EN-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX90A-V2A-EN-NEXT: v_readlane_b32 s30, v40, 0 +; GFX90A-V2A-EN-NEXT: v_readlane_b32 s31, v40, 1 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-EN-NEXT: v_readlane_b32 s4, v40, 2 +; GFX90A-V2A-EN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX90A-V2A-EN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: s_mov_b64 exec, s[6:7] +; GFX90A-V2A-EN-NEXT: .cfi_def_cfa_register 64 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s4 +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: func_call_clobber: +; WAVE32: .Lfunc_begin3: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: ; %entry +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: .cfi_undefined 1536 +; WAVE32-NEXT: .cfi_undefined 1537 +; WAVE32-NEXT: .cfi_undefined 1538 +; WAVE32-NEXT: .cfi_undefined 1539 +; WAVE32-NEXT: .cfi_undefined 1540 +; WAVE32-NEXT: .cfi_undefined 1541 +; WAVE32-NEXT: .cfi_undefined 1542 +; WAVE32-NEXT: .cfi_undefined 1543 +; WAVE32-NEXT: .cfi_undefined 1544 +; WAVE32-NEXT: .cfi_undefined 1545 +; WAVE32-NEXT: .cfi_undefined 1546 +; WAVE32-NEXT: .cfi_undefined 1547 +; WAVE32-NEXT: .cfi_undefined 1548 +; WAVE32-NEXT: .cfi_undefined 1549 +; WAVE32-NEXT: .cfi_undefined 1550 +; WAVE32-NEXT: .cfi_undefined 1551 +; WAVE32-NEXT: .cfi_undefined 1552 +; WAVE32-NEXT: .cfi_undefined 1553 +; WAVE32-NEXT: .cfi_undefined 1554 +; WAVE32-NEXT: .cfi_undefined 1555 +; WAVE32-NEXT: .cfi_undefined 1556 +; WAVE32-NEXT: .cfi_undefined 1557 +; WAVE32-NEXT: .cfi_undefined 1558 +; WAVE32-NEXT: .cfi_undefined 1559 +; WAVE32-NEXT: .cfi_undefined 1560 +; WAVE32-NEXT: .cfi_undefined 1561 +; WAVE32-NEXT: .cfi_undefined 1562 +; WAVE32-NEXT: .cfi_undefined 1563 +; WAVE32-NEXT: .cfi_undefined 1564 +; WAVE32-NEXT: .cfi_undefined 1565 +; WAVE32-NEXT: .cfi_undefined 1566 +; WAVE32-NEXT: .cfi_undefined 1567 +; WAVE32-NEXT: .cfi_undefined 1568 +; WAVE32-NEXT: .cfi_undefined 1569 +; WAVE32-NEXT: .cfi_undefined 1570 +; WAVE32-NEXT: .cfi_undefined 1571 +; WAVE32-NEXT: .cfi_undefined 1572 +; WAVE32-NEXT: .cfi_undefined 1573 +; WAVE32-NEXT: .cfi_undefined 1574 +; WAVE32-NEXT: .cfi_undefined 1575 +; WAVE32-NEXT: .cfi_undefined 1584 +; WAVE32-NEXT: .cfi_undefined 1585 +; WAVE32-NEXT: .cfi_undefined 1586 +; WAVE32-NEXT: .cfi_undefined 1587 +; WAVE32-NEXT: .cfi_undefined 1588 +; WAVE32-NEXT: .cfi_undefined 1589 +; WAVE32-NEXT: .cfi_undefined 1590 +; WAVE32-NEXT: .cfi_undefined 1591 +; WAVE32-NEXT: .cfi_undefined 1600 +; WAVE32-NEXT: .cfi_undefined 1601 +; WAVE32-NEXT: .cfi_undefined 1602 +; WAVE32-NEXT: .cfi_undefined 1603 +; WAVE32-NEXT: .cfi_undefined 1604 +; WAVE32-NEXT: .cfi_undefined 1605 +; WAVE32-NEXT: .cfi_undefined 1606 +; WAVE32-NEXT: .cfi_undefined 1607 +; WAVE32-NEXT: .cfi_undefined 1616 +; WAVE32-NEXT: .cfi_undefined 1617 +; WAVE32-NEXT: .cfi_undefined 1618 +; WAVE32-NEXT: .cfi_undefined 1619 +; WAVE32-NEXT: .cfi_undefined 1620 +; WAVE32-NEXT: .cfi_undefined 1621 +; WAVE32-NEXT: .cfi_undefined 1622 +; WAVE32-NEXT: .cfi_undefined 1623 +; WAVE32-NEXT: .cfi_undefined 1632 +; WAVE32-NEXT: .cfi_undefined 1633 +; WAVE32-NEXT: .cfi_undefined 1634 +; WAVE32-NEXT: .cfi_undefined 1635 +; WAVE32-NEXT: .cfi_undefined 1636 +; WAVE32-NEXT: .cfi_undefined 1637 +; WAVE32-NEXT: .cfi_undefined 1638 +; WAVE32-NEXT: .cfi_undefined 1639 +; WAVE32-NEXT: .cfi_undefined 1648 +; WAVE32-NEXT: .cfi_undefined 1649 +; WAVE32-NEXT: .cfi_undefined 1650 +; WAVE32-NEXT: .cfi_undefined 1651 +; WAVE32-NEXT: .cfi_undefined 1652 +; WAVE32-NEXT: .cfi_undefined 1653 +; WAVE32-NEXT: .cfi_undefined 1654 +; WAVE32-NEXT: .cfi_undefined 1655 +; WAVE32-NEXT: .cfi_undefined 1664 +; WAVE32-NEXT: .cfi_undefined 1665 +; WAVE32-NEXT: .cfi_undefined 1666 +; WAVE32-NEXT: .cfi_undefined 1667 +; WAVE32-NEXT: .cfi_undefined 1668 +; WAVE32-NEXT: .cfi_undefined 1669 +; WAVE32-NEXT: .cfi_undefined 1670 +; WAVE32-NEXT: .cfi_undefined 1671 +; WAVE32-NEXT: .cfi_undefined 1680 +; WAVE32-NEXT: .cfi_undefined 1681 +; WAVE32-NEXT: .cfi_undefined 1682 +; WAVE32-NEXT: .cfi_undefined 1683 +; WAVE32-NEXT: .cfi_undefined 1684 +; WAVE32-NEXT: .cfi_undefined 1685 +; WAVE32-NEXT: .cfi_undefined 1686 +; WAVE32-NEXT: .cfi_undefined 1687 +; WAVE32-NEXT: .cfi_undefined 1696 +; WAVE32-NEXT: .cfi_undefined 1697 +; WAVE32-NEXT: .cfi_undefined 1698 +; WAVE32-NEXT: .cfi_undefined 1699 +; WAVE32-NEXT: .cfi_undefined 1700 +; WAVE32-NEXT: .cfi_undefined 1701 +; WAVE32-NEXT: .cfi_undefined 1702 +; WAVE32-NEXT: .cfi_undefined 1703 +; WAVE32-NEXT: .cfi_undefined 1712 +; WAVE32-NEXT: .cfi_undefined 1713 +; WAVE32-NEXT: .cfi_undefined 1714 +; WAVE32-NEXT: .cfi_undefined 1715 +; WAVE32-NEXT: .cfi_undefined 1716 +; WAVE32-NEXT: .cfi_undefined 1717 +; WAVE32-NEXT: .cfi_undefined 1718 +; WAVE32-NEXT: .cfi_undefined 1719 +; WAVE32-NEXT: .cfi_undefined 1728 +; WAVE32-NEXT: .cfi_undefined 1729 +; WAVE32-NEXT: .cfi_undefined 1730 +; WAVE32-NEXT: .cfi_undefined 1731 +; WAVE32-NEXT: .cfi_undefined 1732 +; WAVE32-NEXT: .cfi_undefined 1733 +; WAVE32-NEXT: .cfi_undefined 1734 +; WAVE32-NEXT: .cfi_undefined 1735 +; WAVE32-NEXT: .cfi_undefined 1744 +; WAVE32-NEXT: .cfi_undefined 1745 +; WAVE32-NEXT: .cfi_undefined 1746 +; WAVE32-NEXT: .cfi_undefined 1747 +; WAVE32-NEXT: .cfi_undefined 1748 +; WAVE32-NEXT: .cfi_undefined 1749 +; WAVE32-NEXT: .cfi_undefined 1750 +; WAVE32-NEXT: .cfi_undefined 1751 +; WAVE32-NEXT: .cfi_undefined 1760 +; WAVE32-NEXT: .cfi_undefined 1761 +; WAVE32-NEXT: .cfi_undefined 1762 +; WAVE32-NEXT: .cfi_undefined 1763 +; WAVE32-NEXT: .cfi_undefined 1764 +; WAVE32-NEXT: .cfi_undefined 1765 +; WAVE32-NEXT: .cfi_undefined 1766 +; WAVE32-NEXT: .cfi_undefined 1767 +; WAVE32-NEXT: .cfi_undefined 1776 +; WAVE32-NEXT: .cfi_undefined 1777 +; WAVE32-NEXT: .cfi_undefined 1778 +; WAVE32-NEXT: .cfi_undefined 1779 +; WAVE32-NEXT: .cfi_undefined 1780 +; WAVE32-NEXT: .cfi_undefined 1781 +; WAVE32-NEXT: .cfi_undefined 1782 +; WAVE32-NEXT: .cfi_undefined 1783 +; WAVE32-NEXT: .cfi_undefined 32 +; WAVE32-NEXT: .cfi_undefined 33 +; WAVE32-NEXT: .cfi_undefined 34 +; WAVE32-NEXT: .cfi_undefined 35 +; WAVE32-NEXT: .cfi_undefined 36 +; WAVE32-NEXT: .cfi_undefined 37 +; WAVE32-NEXT: .cfi_undefined 38 +; WAVE32-NEXT: .cfi_undefined 39 +; WAVE32-NEXT: .cfi_undefined 40 +; WAVE32-NEXT: .cfi_undefined 41 +; WAVE32-NEXT: .cfi_undefined 42 +; WAVE32-NEXT: .cfi_undefined 43 +; WAVE32-NEXT: .cfi_undefined 44 +; WAVE32-NEXT: .cfi_undefined 45 +; WAVE32-NEXT: .cfi_undefined 46 +; WAVE32-NEXT: .cfi_undefined 47 +; WAVE32-NEXT: .cfi_undefined 48 +; WAVE32-NEXT: .cfi_undefined 49 +; WAVE32-NEXT: .cfi_undefined 50 +; WAVE32-NEXT: .cfi_undefined 51 +; WAVE32-NEXT: .cfi_undefined 52 +; WAVE32-NEXT: .cfi_undefined 53 +; WAVE32-NEXT: .cfi_undefined 54 +; WAVE32-NEXT: .cfi_undefined 55 +; WAVE32-NEXT: .cfi_undefined 56 +; WAVE32-NEXT: .cfi_undefined 57 +; WAVE32-NEXT: .cfi_undefined 58 +; WAVE32-NEXT: .cfi_undefined 59 +; WAVE32-NEXT: .cfi_undefined 60 +; WAVE32-NEXT: .cfi_undefined 61 +; WAVE32-NEXT: .cfi_undefined 72 +; WAVE32-NEXT: .cfi_undefined 73 +; WAVE32-NEXT: .cfi_undefined 74 +; WAVE32-NEXT: .cfi_undefined 75 +; WAVE32-NEXT: .cfi_undefined 76 +; WAVE32-NEXT: .cfi_undefined 77 +; WAVE32-NEXT: .cfi_undefined 78 +; WAVE32-NEXT: .cfi_undefined 79 +; WAVE32-NEXT: .cfi_undefined 88 +; WAVE32-NEXT: .cfi_undefined 89 +; WAVE32-NEXT: .cfi_undefined 90 +; WAVE32-NEXT: .cfi_undefined 91 +; WAVE32-NEXT: .cfi_undefined 92 +; WAVE32-NEXT: .cfi_undefined 93 +; WAVE32-NEXT: .cfi_undefined 94 +; WAVE32-NEXT: .cfi_undefined 95 +; WAVE32-NEXT: .cfi_undefined 1096 +; WAVE32-NEXT: .cfi_undefined 1097 +; WAVE32-NEXT: .cfi_undefined 1098 +; WAVE32-NEXT: .cfi_undefined 1099 +; WAVE32-NEXT: .cfi_undefined 1100 +; WAVE32-NEXT: .cfi_undefined 1101 +; WAVE32-NEXT: .cfi_undefined 1102 +; WAVE32-NEXT: .cfi_undefined 1103 +; WAVE32-NEXT: .cfi_undefined 1112 +; WAVE32-NEXT: .cfi_undefined 1113 +; WAVE32-NEXT: .cfi_undefined 1114 +; WAVE32-NEXT: .cfi_undefined 1115 +; WAVE32-NEXT: .cfi_undefined 1116 +; WAVE32-NEXT: .cfi_undefined 1117 +; WAVE32-NEXT: .cfi_undefined 1118 +; WAVE32-NEXT: .cfi_undefined 1119 +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_mov_b32 s16, s33 +; WAVE32-NEXT: s_mov_b32 s33, s32 +; WAVE32-NEXT: s_or_saveexec_b32 s17, -1 +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_offset 1576, 0 +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s17 +; WAVE32-NEXT: v_writelane_b32 v40, s16, 2 +; WAVE32-NEXT: .cfi_llvm_vector_registers 65, 1576, 2, 32 +; WAVE32-NEXT: .cfi_def_cfa_register 65 +; WAVE32-NEXT: v_writelane_b32 v40, s30, 0 +; WAVE32-NEXT: s_addk_i32 s32, 0x200 +; WAVE32-NEXT: v_writelane_b32 v40, s31, 1 +; WAVE32-NEXT: .cfi_llvm_vector_registers 16, 1791, 0, 32, 1791, 1, 32 +; WAVE32-NEXT: s_getpc_b64 s[16:17] +; WAVE32-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; WAVE32-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; WAVE32-NEXT: s_swappc_b64 s[30:31], s[16:17] +; WAVE32-NEXT: v_readlane_b32 s30, v40, 0 +; WAVE32-NEXT: v_readlane_b32 s31, v40, 1 +; WAVE32-NEXT: s_mov_b32 s32, s33 +; WAVE32-NEXT: v_readlane_b32 s4, v40, 2 +; WAVE32-NEXT: s_or_saveexec_b32 s5, -1 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s5 +; WAVE32-NEXT: .cfi_def_cfa_register 64 +; WAVE32-NEXT: s_mov_b32 s33, s4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] +entry: + call void @ex() #0 + ret void +} + +define hidden void @func_spill_vgpr_to_vmem() #0 { +; GFX900-LABEL: func_spill_vgpr_to_vmem: +; GFX900: .Lfunc_begin4: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: ; %entry +; GFX900-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX900-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 256 +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 0 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: func_spill_vgpr_to_vmem: +; GFX90A-V2A-DIS: .Lfunc_begin4: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 768 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 512 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 3104, 32, 17, 64, 256 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a33, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 3105, 32, 17, 64, 0 +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a33, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: func_spill_vgpr_to_vmem: +; GFX90A-V2A-EN: .Lfunc_begin4: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-EN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3072 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3073 +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2600, 3072, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2601, 3073, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 3104, 2560, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v1, a33 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 3105, 2561, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a33, v1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: func_spill_vgpr_to_vmem: +; WAVE32: .Lfunc_begin4: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: ; %entry +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1576, 32, 1, 32, 128 +; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1577, 32, 1, 32, 0 +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_clause 0x1 +; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s32 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] +entry: + call void asm sideeffect "; clobber", "~{v40}"() #0 + call void asm sideeffect "; clobber", "~{v41}"() #0 + call void asm sideeffect "; clobber", "~{a32}"() #0 + call void asm sideeffect "; clobber", "~{a33}"() #0 + ret void +} + +define hidden void @func_spill_vgpr_to_agpr() #2 { +; GFX900-LABEL: func_spill_vgpr_to_agpr: +; GFX900: .Lfunc_begin5: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: +; GFX900-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX900-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 256 +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX900-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 0 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: func_spill_vgpr_to_agpr: +; GFX90A-V2A-DIS: .Lfunc_begin5: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 768 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 2601, 32, 17, 64, 512 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 3104, 32, 17, 64, 256 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a33, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: .cfi_llvm_vector_offset 3105, 32, 17, 64, 0 +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a33, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: func_spill_vgpr_to_agpr: +; GFX90A-V2A-EN: .Lfunc_begin5: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: +; GFX90A-V2A-EN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GFX90A-V2A-EN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2560 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 2561 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3072 +; GFX90A-V2A-EN-NEXT: .cfi_undefined 3073 +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2600, 3072, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 2601, 3073, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 3104, 2560, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v1, a33 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: .cfi_llvm_vector_register_mask 3105, 2561, 32, 17, 64 +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a33, v1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: func_spill_vgpr_to_agpr: +; WAVE32: .Lfunc_begin5: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; WAVE32-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1576, 32, 1, 32, 128 +; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: .cfi_llvm_vector_offset 1577, 32, 1, 32, 0 +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_clause 0x1 +; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s32 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] + call void asm sideeffect "; clobber", "~{v40}"() + call void asm sideeffect "; clobber", "~{v41}"() + call void asm sideeffect "; clobber", "~{a32}"() + call void asm sideeffect "; clobber", "~{a33}"() + ret void +} + + +; NOTE: Number of VGPRs available to kernel, and in turn number of corresponding CFIs generated, +; is dependent on waves/WG size. Since the intent here is to check whether we generate the correct +; CFIs, doing it for any one set of details is sufficient which also makes the test insensitive to +; changes in those details. +attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="128,128" } +attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="128,128" "frame-pointer"="all" } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "filename", directory: "directory") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll index 40cdfd76d6af6..bc928041ed750 100644 --- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll @@ -15,29 +15,514 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: .cfi_sections .debug_frame ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 2565 +; CHECK-NEXT: .cfi_undefined 2566 +; CHECK-NEXT: .cfi_undefined 2567 +; CHECK-NEXT: .cfi_undefined 2568 +; CHECK-NEXT: .cfi_undefined 2569 +; CHECK-NEXT: .cfi_undefined 2570 +; CHECK-NEXT: .cfi_undefined 2571 +; CHECK-NEXT: .cfi_undefined 2572 +; CHECK-NEXT: .cfi_undefined 2573 +; CHECK-NEXT: .cfi_undefined 2574 +; CHECK-NEXT: .cfi_undefined 2575 +; CHECK-NEXT: .cfi_undefined 2576 +; CHECK-NEXT: .cfi_undefined 2577 +; CHECK-NEXT: .cfi_undefined 2578 +; CHECK-NEXT: .cfi_undefined 2579 +; CHECK-NEXT: .cfi_undefined 2580 +; CHECK-NEXT: .cfi_undefined 2581 +; CHECK-NEXT: .cfi_undefined 2582 +; CHECK-NEXT: .cfi_undefined 2583 +; CHECK-NEXT: .cfi_undefined 2584 +; CHECK-NEXT: .cfi_undefined 2585 +; CHECK-NEXT: .cfi_undefined 2586 +; CHECK-NEXT: .cfi_undefined 2587 +; CHECK-NEXT: .cfi_undefined 2588 +; CHECK-NEXT: .cfi_undefined 2589 +; CHECK-NEXT: .cfi_undefined 2590 +; CHECK-NEXT: .cfi_undefined 2591 +; CHECK-NEXT: .cfi_undefined 2592 +; CHECK-NEXT: .cfi_undefined 2593 +; CHECK-NEXT: .cfi_undefined 2594 +; CHECK-NEXT: .cfi_undefined 2595 +; CHECK-NEXT: .cfi_undefined 2596 +; CHECK-NEXT: .cfi_undefined 2597 +; CHECK-NEXT: .cfi_undefined 2598 +; CHECK-NEXT: .cfi_undefined 2599 +; CHECK-NEXT: .cfi_undefined 2608 +; CHECK-NEXT: .cfi_undefined 2609 +; CHECK-NEXT: .cfi_undefined 2610 +; CHECK-NEXT: .cfi_undefined 2611 +; CHECK-NEXT: .cfi_undefined 2612 +; CHECK-NEXT: .cfi_undefined 2613 +; CHECK-NEXT: .cfi_undefined 2614 +; CHECK-NEXT: .cfi_undefined 2615 +; CHECK-NEXT: .cfi_undefined 2624 +; CHECK-NEXT: .cfi_undefined 2625 +; CHECK-NEXT: .cfi_undefined 2626 +; CHECK-NEXT: .cfi_undefined 2627 +; CHECK-NEXT: .cfi_undefined 2628 +; CHECK-NEXT: .cfi_undefined 2629 +; CHECK-NEXT: .cfi_undefined 2630 +; CHECK-NEXT: .cfi_undefined 2631 +; CHECK-NEXT: .cfi_undefined 2640 +; CHECK-NEXT: .cfi_undefined 2641 +; CHECK-NEXT: .cfi_undefined 2642 +; CHECK-NEXT: .cfi_undefined 2643 +; CHECK-NEXT: .cfi_undefined 2644 +; CHECK-NEXT: .cfi_undefined 2645 +; CHECK-NEXT: .cfi_undefined 2646 +; CHECK-NEXT: .cfi_undefined 2647 +; CHECK-NEXT: .cfi_undefined 2656 +; CHECK-NEXT: .cfi_undefined 2657 +; CHECK-NEXT: .cfi_undefined 2658 +; CHECK-NEXT: .cfi_undefined 2659 +; CHECK-NEXT: .cfi_undefined 2660 +; CHECK-NEXT: .cfi_undefined 2661 +; CHECK-NEXT: .cfi_undefined 2662 +; CHECK-NEXT: .cfi_undefined 2663 +; CHECK-NEXT: .cfi_undefined 2672 +; CHECK-NEXT: .cfi_undefined 2673 +; CHECK-NEXT: .cfi_undefined 2674 +; CHECK-NEXT: .cfi_undefined 2675 +; CHECK-NEXT: .cfi_undefined 2676 +; CHECK-NEXT: .cfi_undefined 2677 +; CHECK-NEXT: .cfi_undefined 2678 +; CHECK-NEXT: .cfi_undefined 2679 +; CHECK-NEXT: .cfi_undefined 2688 +; CHECK-NEXT: .cfi_undefined 2689 +; CHECK-NEXT: .cfi_undefined 2690 +; CHECK-NEXT: .cfi_undefined 2691 +; CHECK-NEXT: .cfi_undefined 2692 +; CHECK-NEXT: .cfi_undefined 2693 +; CHECK-NEXT: .cfi_undefined 2694 +; CHECK-NEXT: .cfi_undefined 2695 +; CHECK-NEXT: .cfi_undefined 2704 +; CHECK-NEXT: .cfi_undefined 2705 +; CHECK-NEXT: .cfi_undefined 2706 +; CHECK-NEXT: .cfi_undefined 2707 +; CHECK-NEXT: .cfi_undefined 2708 +; CHECK-NEXT: .cfi_undefined 2709 +; CHECK-NEXT: .cfi_undefined 2710 +; CHECK-NEXT: .cfi_undefined 2711 +; CHECK-NEXT: .cfi_undefined 2720 +; CHECK-NEXT: .cfi_undefined 2721 +; CHECK-NEXT: .cfi_undefined 2722 +; CHECK-NEXT: .cfi_undefined 2723 +; CHECK-NEXT: .cfi_undefined 2724 +; CHECK-NEXT: .cfi_undefined 2725 +; CHECK-NEXT: .cfi_undefined 2726 +; CHECK-NEXT: .cfi_undefined 2727 +; CHECK-NEXT: .cfi_undefined 2736 +; CHECK-NEXT: .cfi_undefined 2737 +; CHECK-NEXT: .cfi_undefined 2738 +; CHECK-NEXT: .cfi_undefined 2739 +; CHECK-NEXT: .cfi_undefined 2740 +; CHECK-NEXT: .cfi_undefined 2741 +; CHECK-NEXT: .cfi_undefined 2742 +; CHECK-NEXT: .cfi_undefined 2743 +; CHECK-NEXT: .cfi_undefined 2752 +; CHECK-NEXT: .cfi_undefined 2753 +; CHECK-NEXT: .cfi_undefined 2754 +; CHECK-NEXT: .cfi_undefined 2755 +; CHECK-NEXT: .cfi_undefined 2756 +; CHECK-NEXT: .cfi_undefined 2757 +; CHECK-NEXT: .cfi_undefined 2758 +; CHECK-NEXT: .cfi_undefined 2759 +; CHECK-NEXT: .cfi_undefined 2768 +; CHECK-NEXT: .cfi_undefined 2769 +; CHECK-NEXT: .cfi_undefined 2770 +; CHECK-NEXT: .cfi_undefined 2771 +; CHECK-NEXT: .cfi_undefined 2772 +; CHECK-NEXT: .cfi_undefined 2773 +; CHECK-NEXT: .cfi_undefined 2774 +; CHECK-NEXT: .cfi_undefined 2775 +; CHECK-NEXT: .cfi_undefined 2784 +; CHECK-NEXT: .cfi_undefined 2785 +; CHECK-NEXT: .cfi_undefined 2786 +; CHECK-NEXT: .cfi_undefined 2787 +; CHECK-NEXT: .cfi_undefined 2788 +; CHECK-NEXT: .cfi_undefined 2789 +; CHECK-NEXT: .cfi_undefined 2790 +; CHECK-NEXT: .cfi_undefined 2791 +; CHECK-NEXT: .cfi_undefined 2800 +; CHECK-NEXT: .cfi_undefined 2801 +; CHECK-NEXT: .cfi_undefined 2802 +; CHECK-NEXT: .cfi_undefined 2803 +; CHECK-NEXT: .cfi_undefined 2804 +; CHECK-NEXT: .cfi_undefined 2805 +; CHECK-NEXT: .cfi_undefined 2806 +; CHECK-NEXT: .cfi_undefined 2807 +; CHECK-NEXT: .cfi_undefined 3072 +; CHECK-NEXT: .cfi_undefined 3073 +; CHECK-NEXT: .cfi_undefined 3074 +; CHECK-NEXT: .cfi_undefined 3075 +; CHECK-NEXT: .cfi_undefined 3076 +; CHECK-NEXT: .cfi_undefined 3077 +; CHECK-NEXT: .cfi_undefined 3078 +; CHECK-NEXT: .cfi_undefined 3079 +; CHECK-NEXT: .cfi_undefined 3080 +; CHECK-NEXT: .cfi_undefined 3081 +; CHECK-NEXT: .cfi_undefined 3082 +; CHECK-NEXT: .cfi_undefined 3083 +; CHECK-NEXT: .cfi_undefined 3084 +; CHECK-NEXT: .cfi_undefined 3085 +; CHECK-NEXT: .cfi_undefined 3086 +; CHECK-NEXT: .cfi_undefined 3087 +; CHECK-NEXT: .cfi_undefined 3088 +; CHECK-NEXT: .cfi_undefined 3089 +; CHECK-NEXT: .cfi_undefined 3090 +; CHECK-NEXT: .cfi_undefined 3091 +; CHECK-NEXT: .cfi_undefined 3092 +; CHECK-NEXT: .cfi_undefined 3093 +; CHECK-NEXT: .cfi_undefined 3094 +; CHECK-NEXT: .cfi_undefined 3095 +; CHECK-NEXT: .cfi_undefined 3096 +; CHECK-NEXT: .cfi_undefined 3097 +; CHECK-NEXT: .cfi_undefined 3098 +; CHECK-NEXT: .cfi_undefined 3099 +; CHECK-NEXT: .cfi_undefined 3100 +; CHECK-NEXT: .cfi_undefined 3101 +; CHECK-NEXT: .cfi_undefined 3102 +; CHECK-NEXT: .cfi_undefined 3103 +; CHECK-NEXT: .cfi_undefined 3104 +; CHECK-NEXT: .cfi_undefined 3105 +; CHECK-NEXT: .cfi_undefined 3106 +; CHECK-NEXT: .cfi_undefined 3107 +; CHECK-NEXT: .cfi_undefined 3108 +; CHECK-NEXT: .cfi_undefined 3109 +; CHECK-NEXT: .cfi_undefined 3110 +; CHECK-NEXT: .cfi_undefined 3111 +; CHECK-NEXT: .cfi_undefined 3112 +; CHECK-NEXT: .cfi_undefined 3113 +; CHECK-NEXT: .cfi_undefined 3114 +; CHECK-NEXT: .cfi_undefined 3115 +; CHECK-NEXT: .cfi_undefined 3116 +; CHECK-NEXT: .cfi_undefined 3117 +; CHECK-NEXT: .cfi_undefined 3118 +; CHECK-NEXT: .cfi_undefined 3119 +; CHECK-NEXT: .cfi_undefined 3120 +; CHECK-NEXT: .cfi_undefined 3121 +; CHECK-NEXT: .cfi_undefined 3122 +; CHECK-NEXT: .cfi_undefined 3123 +; CHECK-NEXT: .cfi_undefined 3124 +; CHECK-NEXT: .cfi_undefined 3125 +; CHECK-NEXT: .cfi_undefined 3126 +; CHECK-NEXT: .cfi_undefined 3127 +; CHECK-NEXT: .cfi_undefined 3128 +; CHECK-NEXT: .cfi_undefined 3129 +; CHECK-NEXT: .cfi_undefined 3130 +; CHECK-NEXT: .cfi_undefined 3131 +; CHECK-NEXT: .cfi_undefined 3132 +; CHECK-NEXT: .cfi_undefined 3133 +; CHECK-NEXT: .cfi_undefined 3134 +; CHECK-NEXT: .cfi_undefined 3135 +; CHECK-NEXT: .cfi_undefined 3136 +; CHECK-NEXT: .cfi_undefined 3137 +; CHECK-NEXT: .cfi_undefined 3138 +; CHECK-NEXT: .cfi_undefined 3139 +; CHECK-NEXT: .cfi_undefined 3140 +; CHECK-NEXT: .cfi_undefined 3141 +; CHECK-NEXT: .cfi_undefined 3142 +; CHECK-NEXT: .cfi_undefined 3143 +; CHECK-NEXT: .cfi_undefined 3144 +; CHECK-NEXT: .cfi_undefined 3145 +; CHECK-NEXT: .cfi_undefined 3146 +; CHECK-NEXT: .cfi_undefined 3147 +; CHECK-NEXT: .cfi_undefined 3148 +; CHECK-NEXT: .cfi_undefined 3149 +; CHECK-NEXT: .cfi_undefined 3150 +; CHECK-NEXT: .cfi_undefined 3151 +; CHECK-NEXT: .cfi_undefined 3152 +; CHECK-NEXT: .cfi_undefined 3153 +; CHECK-NEXT: .cfi_undefined 3154 +; CHECK-NEXT: .cfi_undefined 3155 +; CHECK-NEXT: .cfi_undefined 3156 +; CHECK-NEXT: .cfi_undefined 3157 +; CHECK-NEXT: .cfi_undefined 3158 +; CHECK-NEXT: .cfi_undefined 3159 +; CHECK-NEXT: .cfi_undefined 3160 +; CHECK-NEXT: .cfi_undefined 3161 +; CHECK-NEXT: .cfi_undefined 3162 +; CHECK-NEXT: .cfi_undefined 3163 +; CHECK-NEXT: .cfi_undefined 3164 +; CHECK-NEXT: .cfi_undefined 3165 +; CHECK-NEXT: .cfi_undefined 3166 +; CHECK-NEXT: .cfi_undefined 3167 +; CHECK-NEXT: .cfi_undefined 3168 +; CHECK-NEXT: .cfi_undefined 3169 +; CHECK-NEXT: .cfi_undefined 3170 +; CHECK-NEXT: .cfi_undefined 3171 +; CHECK-NEXT: .cfi_undefined 3172 +; CHECK-NEXT: .cfi_undefined 3173 +; CHECK-NEXT: .cfi_undefined 3174 +; CHECK-NEXT: .cfi_undefined 3175 +; CHECK-NEXT: .cfi_undefined 3176 +; CHECK-NEXT: .cfi_undefined 3177 +; CHECK-NEXT: .cfi_undefined 3178 +; CHECK-NEXT: .cfi_undefined 3179 +; CHECK-NEXT: .cfi_undefined 3180 +; CHECK-NEXT: .cfi_undefined 3181 +; CHECK-NEXT: .cfi_undefined 3182 +; CHECK-NEXT: .cfi_undefined 3183 +; CHECK-NEXT: .cfi_undefined 3184 +; CHECK-NEXT: .cfi_undefined 3185 +; CHECK-NEXT: .cfi_undefined 3186 +; CHECK-NEXT: .cfi_undefined 3187 +; CHECK-NEXT: .cfi_undefined 3188 +; CHECK-NEXT: .cfi_undefined 3189 +; CHECK-NEXT: .cfi_undefined 3190 +; CHECK-NEXT: .cfi_undefined 3191 +; CHECK-NEXT: .cfi_undefined 3192 +; CHECK-NEXT: .cfi_undefined 3193 +; CHECK-NEXT: .cfi_undefined 3194 +; CHECK-NEXT: .cfi_undefined 3195 +; CHECK-NEXT: .cfi_undefined 3196 +; CHECK-NEXT: .cfi_undefined 3197 +; CHECK-NEXT: .cfi_undefined 3198 +; CHECK-NEXT: .cfi_undefined 3199 +; CHECK-NEXT: .cfi_undefined 3200 +; CHECK-NEXT: .cfi_undefined 3201 +; CHECK-NEXT: .cfi_undefined 3202 +; CHECK-NEXT: .cfi_undefined 3203 +; CHECK-NEXT: .cfi_undefined 3204 +; CHECK-NEXT: .cfi_undefined 3205 +; CHECK-NEXT: .cfi_undefined 3206 +; CHECK-NEXT: .cfi_undefined 3207 +; CHECK-NEXT: .cfi_undefined 3208 +; CHECK-NEXT: .cfi_undefined 3209 +; CHECK-NEXT: .cfi_undefined 3210 +; CHECK-NEXT: .cfi_undefined 3211 +; CHECK-NEXT: .cfi_undefined 3212 +; CHECK-NEXT: .cfi_undefined 3213 +; CHECK-NEXT: .cfi_undefined 3214 +; CHECK-NEXT: .cfi_undefined 3215 +; CHECK-NEXT: .cfi_undefined 3216 +; CHECK-NEXT: .cfi_undefined 3217 +; CHECK-NEXT: .cfi_undefined 3218 +; CHECK-NEXT: .cfi_undefined 3219 +; CHECK-NEXT: .cfi_undefined 3220 +; CHECK-NEXT: .cfi_undefined 3221 +; CHECK-NEXT: .cfi_undefined 3222 +; CHECK-NEXT: .cfi_undefined 3223 +; CHECK-NEXT: .cfi_undefined 3224 +; CHECK-NEXT: .cfi_undefined 3225 +; CHECK-NEXT: .cfi_undefined 3226 +; CHECK-NEXT: .cfi_undefined 3227 +; CHECK-NEXT: .cfi_undefined 3228 +; CHECK-NEXT: .cfi_undefined 3229 +; CHECK-NEXT: .cfi_undefined 3230 +; CHECK-NEXT: .cfi_undefined 3231 +; CHECK-NEXT: .cfi_undefined 3232 +; CHECK-NEXT: .cfi_undefined 3233 +; CHECK-NEXT: .cfi_undefined 3234 +; CHECK-NEXT: .cfi_undefined 3235 +; CHECK-NEXT: .cfi_undefined 3236 +; CHECK-NEXT: .cfi_undefined 3237 +; CHECK-NEXT: .cfi_undefined 3238 +; CHECK-NEXT: .cfi_undefined 3239 +; CHECK-NEXT: .cfi_undefined 3240 +; CHECK-NEXT: .cfi_undefined 3241 +; CHECK-NEXT: .cfi_undefined 3242 +; CHECK-NEXT: .cfi_undefined 3243 +; CHECK-NEXT: .cfi_undefined 3244 +; CHECK-NEXT: .cfi_undefined 3245 +; CHECK-NEXT: .cfi_undefined 3246 +; CHECK-NEXT: .cfi_undefined 3247 +; CHECK-NEXT: .cfi_undefined 3248 +; CHECK-NEXT: .cfi_undefined 3249 +; CHECK-NEXT: .cfi_undefined 3250 +; CHECK-NEXT: .cfi_undefined 3251 +; CHECK-NEXT: .cfi_undefined 3252 +; CHECK-NEXT: .cfi_undefined 3253 +; CHECK-NEXT: .cfi_undefined 3254 +; CHECK-NEXT: .cfi_undefined 3255 +; CHECK-NEXT: .cfi_undefined 3256 +; CHECK-NEXT: .cfi_undefined 3257 +; CHECK-NEXT: .cfi_undefined 3258 +; CHECK-NEXT: .cfi_undefined 3259 +; CHECK-NEXT: .cfi_undefined 3260 +; CHECK-NEXT: .cfi_undefined 3261 +; CHECK-NEXT: .cfi_undefined 3262 +; CHECK-NEXT: .cfi_undefined 3263 +; CHECK-NEXT: .cfi_undefined 3264 +; CHECK-NEXT: .cfi_undefined 3265 +; CHECK-NEXT: .cfi_undefined 3266 +; CHECK-NEXT: .cfi_undefined 3267 +; CHECK-NEXT: .cfi_undefined 3268 +; CHECK-NEXT: .cfi_undefined 3269 +; CHECK-NEXT: .cfi_undefined 3270 +; CHECK-NEXT: .cfi_undefined 3271 +; CHECK-NEXT: .cfi_undefined 3272 +; CHECK-NEXT: .cfi_undefined 3273 +; CHECK-NEXT: .cfi_undefined 3274 +; CHECK-NEXT: .cfi_undefined 3275 +; CHECK-NEXT: .cfi_undefined 3276 +; CHECK-NEXT: .cfi_undefined 3277 +; CHECK-NEXT: .cfi_undefined 3278 +; CHECK-NEXT: .cfi_undefined 3279 +; CHECK-NEXT: .cfi_undefined 3280 +; CHECK-NEXT: .cfi_undefined 3281 +; CHECK-NEXT: .cfi_undefined 3282 +; CHECK-NEXT: .cfi_undefined 3283 +; CHECK-NEXT: .cfi_undefined 3284 +; CHECK-NEXT: .cfi_undefined 3285 +; CHECK-NEXT: .cfi_undefined 3286 +; CHECK-NEXT: .cfi_undefined 3287 +; CHECK-NEXT: .cfi_undefined 3288 +; CHECK-NEXT: .cfi_undefined 3289 +; CHECK-NEXT: .cfi_undefined 3290 +; CHECK-NEXT: .cfi_undefined 3291 +; CHECK-NEXT: .cfi_undefined 3292 +; CHECK-NEXT: .cfi_undefined 3293 +; CHECK-NEXT: .cfi_undefined 3294 +; CHECK-NEXT: .cfi_undefined 3295 +; CHECK-NEXT: .cfi_undefined 3296 +; CHECK-NEXT: .cfi_undefined 3297 +; CHECK-NEXT: .cfi_undefined 3298 +; CHECK-NEXT: .cfi_undefined 3299 +; CHECK-NEXT: .cfi_undefined 3300 +; CHECK-NEXT: .cfi_undefined 3301 +; CHECK-NEXT: .cfi_undefined 3302 +; CHECK-NEXT: .cfi_undefined 3303 +; CHECK-NEXT: .cfi_undefined 3304 +; CHECK-NEXT: .cfi_undefined 3305 +; CHECK-NEXT: .cfi_undefined 3306 +; CHECK-NEXT: .cfi_undefined 3307 +; CHECK-NEXT: .cfi_undefined 3308 +; CHECK-NEXT: .cfi_undefined 3309 +; CHECK-NEXT: .cfi_undefined 3310 +; CHECK-NEXT: .cfi_undefined 3311 +; CHECK-NEXT: .cfi_undefined 3312 +; CHECK-NEXT: .cfi_undefined 3313 +; CHECK-NEXT: .cfi_undefined 3314 +; CHECK-NEXT: .cfi_undefined 3315 +; CHECK-NEXT: .cfi_undefined 3316 +; CHECK-NEXT: .cfi_undefined 3317 +; CHECK-NEXT: .cfi_undefined 3318 +; CHECK-NEXT: .cfi_undefined 3319 +; CHECK-NEXT: .cfi_undefined 3320 +; CHECK-NEXT: .cfi_undefined 3321 +; CHECK-NEXT: .cfi_undefined 3322 +; CHECK-NEXT: .cfi_undefined 3323 +; CHECK-NEXT: .cfi_undefined 3324 +; CHECK-NEXT: .cfi_undefined 3325 +; CHECK-NEXT: .cfi_undefined 3326 +; CHECK-NEXT: .cfi_undefined 3327 +; CHECK-NEXT: .cfi_undefined 32 +; CHECK-NEXT: .cfi_undefined 33 +; CHECK-NEXT: .cfi_undefined 34 +; CHECK-NEXT: .cfi_undefined 35 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 +; CHECK-NEXT: .cfi_undefined 38 +; CHECK-NEXT: .cfi_undefined 39 +; CHECK-NEXT: .cfi_undefined 40 +; CHECK-NEXT: .cfi_undefined 41 +; CHECK-NEXT: .cfi_undefined 42 +; CHECK-NEXT: .cfi_undefined 43 +; CHECK-NEXT: .cfi_undefined 44 +; CHECK-NEXT: .cfi_undefined 45 +; CHECK-NEXT: .cfi_undefined 46 +; CHECK-NEXT: .cfi_undefined 47 +; CHECK-NEXT: .cfi_undefined 48 +; CHECK-NEXT: .cfi_undefined 49 +; CHECK-NEXT: .cfi_undefined 50 +; CHECK-NEXT: .cfi_undefined 51 +; CHECK-NEXT: .cfi_undefined 52 +; CHECK-NEXT: .cfi_undefined 53 +; CHECK-NEXT: .cfi_undefined 54 +; CHECK-NEXT: .cfi_undefined 55 +; CHECK-NEXT: .cfi_undefined 56 +; CHECK-NEXT: .cfi_undefined 57 +; CHECK-NEXT: .cfi_undefined 58 +; CHECK-NEXT: .cfi_undefined 59 +; CHECK-NEXT: .cfi_undefined 60 +; CHECK-NEXT: .cfi_undefined 61 +; CHECK-NEXT: .cfi_undefined 72 +; CHECK-NEXT: .cfi_undefined 73 +; CHECK-NEXT: .cfi_undefined 74 +; CHECK-NEXT: .cfi_undefined 75 +; CHECK-NEXT: .cfi_undefined 76 +; CHECK-NEXT: .cfi_undefined 77 +; CHECK-NEXT: .cfi_undefined 78 +; CHECK-NEXT: .cfi_undefined 79 +; CHECK-NEXT: .cfi_undefined 88 +; CHECK-NEXT: .cfi_undefined 89 +; CHECK-NEXT: .cfi_undefined 90 +; CHECK-NEXT: .cfi_undefined 91 +; CHECK-NEXT: .cfi_undefined 92 +; CHECK-NEXT: .cfi_undefined 93 +; CHECK-NEXT: .cfi_undefined 94 +; CHECK-NEXT: .cfi_undefined 95 +; CHECK-NEXT: .cfi_undefined 1096 +; CHECK-NEXT: .cfi_undefined 1097 +; CHECK-NEXT: .cfi_undefined 1098 +; CHECK-NEXT: .cfi_undefined 1099 +; CHECK-NEXT: .cfi_undefined 1100 +; CHECK-NEXT: .cfi_undefined 1101 +; CHECK-NEXT: .cfi_undefined 1102 +; CHECK-NEXT: .cfi_undefined 1103 +; CHECK-NEXT: .cfi_undefined 1112 +; CHECK-NEXT: .cfi_undefined 1113 +; CHECK-NEXT: .cfi_undefined 1114 +; CHECK-NEXT: .cfi_undefined 1115 +; CHECK-NEXT: .cfi_undefined 1116 +; CHECK-NEXT: .cfi_undefined 1117 +; CHECK-NEXT: .cfi_undefined 1118 +; CHECK-NEXT: .cfi_undefined 1119 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s16, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 2601, 256 ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v41, s16, 16 -; CHECK-NEXT: v_writelane_b32 v41, s30, 0 -; CHECK-NEXT: v_writelane_b32 v41, s31, 1 -; CHECK-NEXT: v_writelane_b32 v41, s34, 2 -; CHECK-NEXT: v_writelane_b32 v41, s35, 3 -; CHECK-NEXT: v_writelane_b32 v41, s36, 4 -; CHECK-NEXT: v_writelane_b32 v41, s37, 5 -; CHECK-NEXT: v_writelane_b32 v41, s38, 6 -; CHECK-NEXT: v_writelane_b32 v41, s39, 7 -; CHECK-NEXT: v_writelane_b32 v41, s48, 8 -; CHECK-NEXT: v_writelane_b32 v41, s49, 9 -; CHECK-NEXT: v_writelane_b32 v41, s50, 10 -; CHECK-NEXT: v_writelane_b32 v41, s51, 11 -; CHECK-NEXT: v_writelane_b32 v41, s52, 12 +; CHECK-NEXT: .cfi_llvm_vector_registers 65, 2601, 16, 32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_addk_i32 s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v41, s53, 13 -; CHECK-NEXT: v_writelane_b32 v41, s54, 14 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: .cfi_llvm_vector_offset 2600, 32, 17, 64, 0 +; CHECK-NEXT: v_writelane_b32 v41, s34, 0 +; CHECK-NEXT: .cfi_llvm_vector_registers 66, 2622, 0, 32 +; CHECK-NEXT: v_writelane_b32 v41, s35, 1 +; CHECK-NEXT: .cfi_llvm_vector_registers 67, 2622, 1, 32 +; CHECK-NEXT: v_writelane_b32 v41, s36, 2 +; CHECK-NEXT: .cfi_llvm_vector_registers 68, 2622, 2, 32 +; CHECK-NEXT: v_writelane_b32 v41, s37, 3 +; CHECK-NEXT: .cfi_llvm_vector_registers 69, 2622, 3, 32 +; CHECK-NEXT: v_writelane_b32 v41, s38, 4 +; CHECK-NEXT: .cfi_llvm_vector_registers 70, 2622, 4, 32 +; CHECK-NEXT: v_writelane_b32 v41, s39, 5 +; CHECK-NEXT: .cfi_llvm_vector_registers 71, 2622, 5, 32 +; CHECK-NEXT: v_writelane_b32 v41, s48, 6 +; CHECK-NEXT: .cfi_llvm_vector_registers 80, 2622, 6, 32 +; CHECK-NEXT: v_writelane_b32 v41, s49, 7 +; CHECK-NEXT: .cfi_llvm_vector_registers 81, 2622, 7, 32 +; CHECK-NEXT: v_writelane_b32 v41, s50, 8 +; CHECK-NEXT: .cfi_llvm_vector_registers 82, 2622, 8, 32 +; CHECK-NEXT: v_writelane_b32 v41, s51, 9 +; CHECK-NEXT: .cfi_llvm_vector_registers 83, 2622, 9, 32 +; CHECK-NEXT: v_writelane_b32 v41, s52, 10 +; CHECK-NEXT: .cfi_llvm_vector_registers 84, 2622, 10, 32 +; CHECK-NEXT: v_writelane_b32 v41, s53, 11 +; CHECK-NEXT: .cfi_llvm_vector_registers 85, 2622, 11, 32 +; CHECK-NEXT: v_writelane_b32 v41, s54, 12 +; CHECK-NEXT: .cfi_llvm_vector_registers 86, 2622, 12, 32 +; CHECK-NEXT: v_writelane_b32 v41, s55, 13 +; CHECK-NEXT: .cfi_llvm_vector_registers 87, 2622, 13, 32 +; CHECK-NEXT: v_writelane_b32 v41, s30, 14 +; CHECK-NEXT: v_writelane_b32 v41, s31, 15 +; CHECK-NEXT: .cfi_llvm_vector_registers 16, 2622, 14, 32, 2622, 15, 32 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- undef ; CHECK-NEXT: .Ltmp0: @@ -45,10 +530,8 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, __kmpc_alloc_shared@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, __kmpc_alloc_shared@gotpcrel32@hi+12 -; CHECK-NEXT: v_writelane_b32 v41, s55, 15 ; CHECK-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: s_mov_b32 s50, s15 ; CHECK-NEXT: s_mov_b32 s51, s14 @@ -74,28 +557,29 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: .loc 1 0 9 is_stmt 0 ; dummy:0:9 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_readlane_b32 s30, v41, 14 ; CHECK-NEXT: flat_store_dword v[0:1], v2 -; CHECK-NEXT: v_readlane_b32 s55, v41, 15 -; CHECK-NEXT: v_readlane_b32 s54, v41, 14 -; CHECK-NEXT: v_readlane_b32 s53, v41, 13 -; CHECK-NEXT: v_readlane_b32 s52, v41, 12 -; CHECK-NEXT: v_readlane_b32 s51, v41, 11 -; CHECK-NEXT: v_readlane_b32 s50, v41, 10 -; CHECK-NEXT: v_readlane_b32 s49, v41, 9 -; CHECK-NEXT: v_readlane_b32 s48, v41, 8 -; CHECK-NEXT: v_readlane_b32 s39, v41, 7 -; CHECK-NEXT: v_readlane_b32 s38, v41, 6 -; CHECK-NEXT: v_readlane_b32 s37, v41, 5 -; CHECK-NEXT: v_readlane_b32 s36, v41, 4 -; CHECK-NEXT: v_readlane_b32 s35, v41, 3 -; CHECK-NEXT: v_readlane_b32 s34, v41, 2 -; CHECK-NEXT: v_readlane_b32 s31, v41, 1 -; CHECK-NEXT: v_readlane_b32 s30, v41, 0 +; CHECK-NEXT: v_readlane_b32 s31, v41, 15 +; CHECK-NEXT: v_readlane_b32 s55, v41, 13 +; CHECK-NEXT: v_readlane_b32 s54, v41, 12 +; CHECK-NEXT: v_readlane_b32 s53, v41, 11 +; CHECK-NEXT: v_readlane_b32 s52, v41, 10 +; CHECK-NEXT: v_readlane_b32 s51, v41, 9 +; CHECK-NEXT: v_readlane_b32 s50, v41, 8 +; CHECK-NEXT: v_readlane_b32 s49, v41, 7 +; CHECK-NEXT: v_readlane_b32 s48, v41, 6 +; CHECK-NEXT: v_readlane_b32 s39, v41, 5 +; CHECK-NEXT: v_readlane_b32 s38, v41, 4 +; CHECK-NEXT: v_readlane_b32 s37, v41, 3 +; CHECK-NEXT: v_readlane_b32 s36, v41, 2 +; CHECK-NEXT: v_readlane_b32 s35, v41, 1 +; CHECK-NEXT: v_readlane_b32 s34, v41, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v41, 16 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll b/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll index bcccf50e3805c..a3863156b8d34 100644 --- a/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll +++ b/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll @@ -286,21 +286,20 @@ define amdgpu_gfx void @amdgpu_gfx() #0 { ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe ; CHECK-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-TRUE16-NEXT: s_add_co_i32 s32, s32, 16 +; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo -; CHECK-TRUE16-NEXT: s_add_co_i32 s32, s32, 16 -; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 ; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47 -; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe ; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; CHECK-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-TRUE16-NEXT: s_mov_b32 s32, s33 ; CHECK-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; CHECK-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -326,21 +325,20 @@ define amdgpu_gfx void @amdgpu_gfx() #0 { ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe ; CHECK-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-FAKE16-NEXT: s_add_co_i32 s32, s32, 16 +; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi ; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo -; CHECK-FAKE16-NEXT: s_add_co_i32 s32, s32, 16 -; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47 -; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe ; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; CHECK-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; CHECK-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-FAKE16-NEXT: s_mov_b32 s32, s33 ; CHECK-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; CHECK-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll index c5db7a33f70e0..ed767aeaf112f 100644 --- a/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll @@ -1049,12 +1049,12 @@ define void @test_dynamic_stackalloc_device_uniform(i32 %n) { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1078,12 +1078,12 @@ define void @test_dynamic_stackalloc_device_uniform(i32 %n) { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1107,13 +1107,14 @@ define void @test_dynamic_stackalloc_device_uniform(i32 %n) { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -1136,13 +1137,14 @@ define void @test_dynamic_stackalloc_device_uniform(i32 %n) { ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -1171,15 +1173,15 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 ; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s34 +; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xffffe000 +; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xffffe000 -; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x4000 ; GFX9-SDAG-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1205,15 +1207,15 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 ; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GFX9-GISEL-NEXT: s_mov_b32 s10, s34 +; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xffffe000 +; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xffffe000 -; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x4000 ; GFX9-GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1239,19 +1241,20 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 ; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 0x7f ; GFX11-SDAG-NEXT: s_mov_b32 s5, s34 -; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 -; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 ; GFX11-SDAG-NEXT: s_and_b32 s33, s33, 0xffffff80 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 +; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 ; GFX11-SDAG-NEXT: s_addk_i32 s32, 0x100 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 -; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 ; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 ; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 @@ -1261,31 +1264,33 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX11-SDAG-NEXT: s_add_i32 s1, s32, 0xfff ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 10 ; GFX11-SDAG-NEXT: s_and_b32 s1, s1, 0xfffff000 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 +; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 ; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 ; GFX11-SDAG-NEXT: s_mov_b32 s32, s34 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s5 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 ; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 0x7f ; GFX11-GISEL-NEXT: s_mov_b32 s5, s34 -; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 ; GFX11-GISEL-NEXT: s_and_b32 s33, s33, 0xffffff80 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 +; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 ; GFX11-GISEL-NEXT: s_addk_i32 s32, 0x100 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 ; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 ; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 @@ -1296,12 +1301,13 @@ define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 10 ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 ; GFX11-GISEL-NEXT: s_and_b32 s1, s1, 0xfffff000 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 ; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_mov_b32 s32, s34 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s5 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, i32 %n, align 128, addrspace(5) store volatile i32 10, ptr addrspace(5) %alloca @@ -1312,12 +1318,12 @@ define void @test_dynamic_stackalloc_device_uniform_under_aligned(i32 %n) { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1341,12 +1347,12 @@ define void @test_dynamic_stackalloc_device_uniform_under_aligned(i32 %n) { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1370,13 +1376,14 @@ define void @test_dynamic_stackalloc_device_uniform_under_aligned(i32 %n) { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -1399,13 +1406,14 @@ define void @test_dynamic_stackalloc_device_uniform_under_aligned(i32 %n) { ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -1434,13 +1442,13 @@ define void @test_dynamic_stackalloc_device_divergent() { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1464,13 +1472,13 @@ define void @test_dynamic_stackalloc_device_divergent() { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1494,14 +1502,14 @@ define void @test_dynamic_stackalloc_device_divergent() { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX11-SDAG-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 @@ -1525,14 +1533,14 @@ define void @test_dynamic_stackalloc_device_divergent() { ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 @@ -1566,6 +1574,7 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX9-SDAG-NEXT: s_mov_b32 s10, s33 ; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s34 +; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xffffe000 ; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x4000 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 @@ -1575,7 +1584,6 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0 -; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xffffe000 ; GFX9-SDAG-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s8, s[4:5] ; GFX9-SDAG-NEXT: v_readlane_b32 s9, v0, s8 @@ -1598,16 +1606,16 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 ; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GFX9-GISEL-NEXT: s_mov_b32 s10, s34 +; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xffffe000 +; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 +; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xffffe000 -; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x4000 ; GFX9-GISEL-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1633,19 +1641,20 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s33 ; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 0x7f ; GFX11-SDAG-NEXT: s_mov_b32 s6, s34 +; GFX11-SDAG-NEXT: s_and_b32 s33, s33, 0xffffff80 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_addk_i32 s32, 0x100 ; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo ; GFX11-SDAG-NEXT: s_add_i32 s0, s32, 0xfff ; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_and_b32 s0, s0, 0xfffff000 -; GFX11-SDAG-NEXT: s_and_b32 s33, s33, 0xffffff80 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX11-SDAG-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s3, s2 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -1657,28 +1666,29 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX11-SDAG-NEXT: ; %bb.2: ; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s1, 5, s0 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x1bc -; GFX11-SDAG-NEXT: s_mov_b32 s33, s5 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 ; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s0 dlc ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-SDAG-NEXT: s_mov_b32 s32, s34 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s6 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s5 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 ; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 0x7f ; GFX11-GISEL-NEXT: s_mov_b32 s5, s34 -; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 ; GFX11-GISEL-NEXT: s_and_b32 s33, s33, 0xffffff80 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 +; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 ; GFX11-GISEL-NEXT: s_addk_i32 s32, 0x100 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 @@ -1693,12 +1703,13 @@ define void @test_dynamic_stackalloc_device_divergent_over_aligned() { ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x1bc ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 ; GFX11-GISEL-NEXT: s_and_b32 s1, s1, 0xfffff000 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 ; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-GISEL-NEXT: s_mov_b32 s32, s34 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s5 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %idx = call i32 @llvm.amdgcn.workitem.id.x() %alloca = alloca i32, i32 %idx, align 128, addrspace(5) @@ -1710,13 +1721,13 @@ define void @test_dynamic_stackalloc_device_divergent_under_aligned() { ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1740,13 +1751,13 @@ define void @test_dynamic_stackalloc_device_divergent_under_aligned() { ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -1770,14 +1781,14 @@ define void @test_dynamic_stackalloc_device_divergent_under_aligned() { ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 ; GFX11-SDAG-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 @@ -1801,14 +1812,14 @@ define void @test_dynamic_stackalloc_device_divergent_under_aligned() { ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 @@ -1842,10 +1853,10 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX9-SDAG-NEXT: s_mov_b32 s13, s33 ; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0xfc0 ; GFX9-SDAG-NEXT: s_mov_b32 s14, s34 -; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x3000 ; GFX9-SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-SDAG-NEXT: s_cbranch_execz .LBB14_6 @@ -1925,10 +1936,10 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX9-GISEL-NEXT: s_mov_b32 s13, s33 ; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0xfc0 ; GFX9-GISEL-NEXT: s_mov_b32 s14, s34 -; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x3000 ; GFX9-GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-GISEL-NEXT: s_cbranch_execz .LBB14_6 @@ -2007,10 +2018,10 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX11-SDAG-NEXT: s_mov_b32 s7, s33 ; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 63 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s34 -; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_and_not1_b32 s33, s33, 63 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 +; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_addk_i32 s32, 0xc0 ; GFX11-SDAG-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX11-SDAG-NEXT: s_cbranch_execz .LBB14_6 @@ -2092,10 +2103,10 @@ define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { ; GFX11-GISEL-NEXT: s_mov_b32 s7, s33 ; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 63 ; GFX11-GISEL-NEXT: s_mov_b32 s8, s34 -; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_and_not1_b32 s33, s33, 63 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_addk_i32 s32, 0xc0 ; GFX11-GISEL-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX11-GISEL-NEXT: s_cbranch_execz .LBB14_6 @@ -2192,10 +2203,10 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX9-SDAG-NEXT: s_mov_b32 s11, s33 ; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0xfc0 ; GFX9-SDAG-NEXT: s_mov_b32 s12, s34 -; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 -; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x2000 ; GFX9-SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-SDAG-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -2258,10 +2269,10 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX9-GISEL-NEXT: s_mov_b32 s11, s33 ; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0xfc0 ; GFX9-GISEL-NEXT: s_mov_b32 s12, s34 -; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 -; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x2000 ; GFX9-GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] @@ -2324,10 +2335,10 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX11-SDAG-NEXT: s_mov_b32 s5, s33 ; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 63 ; GFX11-SDAG-NEXT: s_mov_b32 s6, s34 -; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s0, exec_lo ; GFX11-SDAG-NEXT: s_and_not1_b32 s33, s33, 63 ; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 +; GFX11-SDAG-NEXT: s_mov_b32 s0, exec_lo ; GFX11-SDAG-NEXT: s_addk_i32 s32, 0x80 ; GFX11-SDAG-NEXT: v_cmpx_ne_u32_e32 0, v0 ; GFX11-SDAG-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -2393,10 +2404,10 @@ define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { ; GFX11-GISEL-NEXT: s_mov_b32 s5, s33 ; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 63 ; GFX11-GISEL-NEXT: s_mov_b32 s6, s34 -; GFX11-GISEL-NEXT: s_mov_b32 s1, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s0, exec_lo ; GFX11-GISEL-NEXT: s_and_not1_b32 s33, s33, 63 ; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0 +; GFX11-GISEL-NEXT: s_mov_b32 s0, exec_lo ; GFX11-GISEL-NEXT: s_addk_i32 s32, 0x80 ; GFX11-GISEL-NEXT: v_cmpx_ne_u32_e32 0, v0 ; GFX11-GISEL-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -2476,13 +2487,13 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i16(i16 ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff0, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -2506,13 +2517,13 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i16(i16 ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 -; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -2536,15 +2547,15 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i16(i16 ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX11-SDAG-NEXT: v_mov_b16_e32 v1.h, 0 ; GFX11-SDAG-NEXT: v_mov_b16_e32 v1.l, v0.l -; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v1, 2, 15 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v1, 2, 15 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff0, v0 ; GFX11-SDAG-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 @@ -2568,14 +2579,14 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i16(i16 ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 @@ -2605,12 +2616,12 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i64(i64 ; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 +; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec ; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 -; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 ; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 ; GFX9-SDAG-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -2634,12 +2645,12 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i64(i64 ; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 +; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 ; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 ; GFX9-GISEL-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] @@ -2663,13 +2674,14 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i64(i64 ; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 +; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 +; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo ; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 -; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-SDAG-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -2692,13 +2704,14 @@ define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i64(i64 ; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 +; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 +; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 ; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo ; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 -; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 ; GFX11-GISEL-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir index 49a91e6f6f33b..15ef61fd75bad 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir @@ -21,21 +21,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__inline_imm__fi_offset0 - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr4, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__inline_imm__fi_offset0 - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr4, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__inline_imm__fi_offset0 - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__inline_imm__fi_offset0 - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr32, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 12, %stack.0, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -54,21 +66,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__inline_imm - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 12, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__inline_imm - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 12, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__inline_imm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__inline_imm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 12, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.0, 12, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -88,21 +112,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 28, $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 28, $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 28, $sgpr32, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 28, $sgpr32, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 28, $sgpr32, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 28, $sgpr32, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 12, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7 @@ -121,21 +157,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset0 - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr4, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset0 - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr4, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset0 - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__literal__fi_offset0 - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr32, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 68, %stack.0, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -154,21 +202,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__literal - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 68, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__literal - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 68, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__literal - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__literal - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 68, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.0, 68, implicit-def $scc SI_RETURN implicit $sgpr7 @@ -188,21 +248,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset96 - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset96 - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset96 - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__literal__fi_offset96 - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -222,21 +294,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32____fi_offset96__literal - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 164, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32____fi_offset96__literal - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 164, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32____fi_offset96__literal - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32____fi_offset96__literal - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -258,6 +342,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0 ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -266,6 +353,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0 ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -274,6 +364,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0 ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 @@ -281,6 +374,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0 ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 @@ -304,6 +400,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__sgpr ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -312,6 +411,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -320,6 +422,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 @@ -327,6 +432,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 @@ -351,6 +459,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc @@ -359,6 +470,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc @@ -367,6 +481,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 @@ -374,6 +491,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 @@ -398,6 +518,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc @@ -406,6 +529,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc @@ -414,6 +540,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -421,6 +550,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -445,6 +577,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 @@ -453,17 +587,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal - ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal - ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -485,6 +625,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc @@ -493,17 +635,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -525,6 +673,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 @@ -533,17 +683,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal - ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal - ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -567,6 +723,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def dead $scc @@ -575,6 +733,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def dead $scc @@ -583,12 +743,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc @@ -613,6 +777,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__fi_literal_offset__sgpr ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 96, $sgpr8, implicit-def dead $scc @@ -621,6 +787,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__fi_literal_offset__sgpr ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 96, $sgpr8, implicit-def dead $scc @@ -629,12 +797,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__kernel__fi_literal_offset__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 96, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__fi_literal_offset__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 96, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.1, $sgpr8, implicit-def dead $scc @@ -658,6 +830,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__sgpr__fi_offset0__live_scc ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 0, implicit-def $scc @@ -666,6 +840,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__sgpr__fi_offset0__live_scc ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 0, implicit-def $scc @@ -674,12 +850,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__kernel__sgpr__fi_offset0__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__sgpr__fi_offset0__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def $scc @@ -702,6 +882,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc @@ -710,6 +893,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc @@ -718,6 +904,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -725,6 +914,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -750,6 +942,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset__live_scc ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def $scc @@ -758,6 +952,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset__live_scc ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def $scc @@ -766,12 +962,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel__sgpr__fi_literal_offset__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, 96, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def $scc @@ -795,6 +995,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc @@ -803,6 +1006,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc @@ -811,6 +1017,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -818,6 +1027,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -840,21 +1052,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 8, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -874,21 +1098,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 40, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 40, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 %stack.1, 8, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -911,6 +1147,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc @@ -919,17 +1157,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 8, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -952,6 +1196,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc @@ -960,17 +1206,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 %stack.1, 8, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -989,21 +1241,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__0__fi_offset0 - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr4 ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__0__fi_offset0 - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr4 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__0__fi_offset0 - ; FLATSCRW64: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY $sgpr32 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__0__fi_offset0 - ; FLATSCRW32: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY $sgpr32 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 0, %stack.0, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -1022,21 +1286,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__0 - ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr4 ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__0 - ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr4 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__0 - ; FLATSCRW64: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY $sgpr32 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__0 - ; FLATSCRW32: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY $sgpr32 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.0, 0, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -1058,6 +1334,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__same_sgpr__fi_offset0 ; MUBUFW64: liveins: $sgpr7 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr7, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1066,6 +1345,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__same_sgpr__fi_offset0 ; MUBUFW32: liveins: $sgpr7 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr7, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1074,6 +1356,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__same_sgpr__fi_offset0 ; FLATSCRW64: liveins: $sgpr7 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr7, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 @@ -1081,6 +1366,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__same_sgpr__fi_offset0 ; FLATSCRW32: liveins: $sgpr7 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr7, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 @@ -1104,6 +1392,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1112,6 +1403,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1120,6 +1414,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 @@ -1127,6 +1424,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0 ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 @@ -1150,6 +1450,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1158,6 +1461,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr7, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 @@ -1166,6 +1472,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 @@ -1173,6 +1482,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 @@ -1198,6 +1510,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr8 = COPY $sgpr8 @@ -1206,6 +1520,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr8 = COPY $sgpr8 @@ -1214,12 +1530,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr8 = COPY $sgpr8 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 ; ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr8 = COPY $sgpr8 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc @@ -1244,6 +1564,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr8 = COPY $sgpr8 @@ -1252,6 +1574,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr8 = COPY $sgpr8 @@ -1260,12 +1584,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr8 = COPY $sgpr8 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr8 = COPY $sgpr8 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc @@ -1291,6 +1619,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc @@ -1299,6 +1629,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc @@ -1307,12 +1639,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 ; ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc @@ -1338,6 +1674,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc @@ -1346,6 +1684,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc @@ -1354,12 +1694,16 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc @@ -1384,6 +1728,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr8 @@ -1392,6 +1739,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr8 @@ -1400,6 +1750,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr4 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 @@ -1407,6 +1760,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr4 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 @@ -1432,6 +1788,9 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr8, 32, implicit-def dead $scc @@ -1440,6 +1799,9 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr8, 32, implicit-def dead $scc @@ -1448,6 +1810,9 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr4, 32, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 @@ -1455,6 +1820,9 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr4, 32, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 @@ -1479,6 +1847,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression ; MUBUFW64: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; MUBUFW64-NEXT: renamable $vcc_hi = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $vcc_hi = S_ADD_I32 killed $vcc_hi, renamable $vcc_lo, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi @@ -1487,6 +1857,8 @@ body: | ; MUBUFW32-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression ; MUBUFW32: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; MUBUFW32-NEXT: renamable $vcc_hi = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $vcc_hi = S_ADD_I32 killed $vcc_hi, renamable $vcc_lo, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi @@ -1495,6 +1867,8 @@ body: | ; FLATSCRW64-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression ; FLATSCRW64: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; FLATSCRW64-NEXT: renamable $vcc_hi = S_ADD_I32 killed $sgpr32, renamable $vcc_lo, implicit-def dead $scc ; FLATSCRW64-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi ; FLATSCRW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29 @@ -1502,6 +1876,8 @@ body: | ; FLATSCRW32-LABEL: name: s_add_i32_use_dst_reg_as_temp_regression ; FLATSCRW32: liveins: $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; FLATSCRW32-NEXT: renamable $vcc_hi = S_ADD_I32 killed $sgpr32, renamable $vcc_lo, implicit-def dead $scc ; FLATSCRW32-NEXT: renamable $vcc_hi = COPY killed renamable $vcc_hi ; FLATSCRW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29 @@ -1525,24 +1901,36 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_zero ; MUBUFW64: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = COPY renamable $sgpr7 ; MUBUFW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; MUBUFW32-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_zero ; MUBUFW32: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = COPY renamable $sgpr7 ; MUBUFW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; FLATSCRW64-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_zero ; FLATSCRW64: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY renamable $sgpr7 ; FLATSCRW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; FLATSCRW32-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_zero ; FLATSCRW32: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY renamable $sgpr7 ; FLATSCRW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, %stack.0, implicit-def dead $scc @@ -1566,24 +1954,36 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_non_zero ; MUBUFW64: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, 64, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; MUBUFW32-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_non_zero ; MUBUFW32: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, 64, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; FLATSCRW64-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_non_zero ; FLATSCRW64: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, 64, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 ; ; FLATSCRW32-LABEL: name: s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_non_zero ; FLATSCRW32: liveins: $vcc_lo, $vcc_hi, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, 64, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $vcc_lo, implicit $vcc_hi, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31, implicit $sgpr32, implicit $sgpr33, implicit $sgpr34, implicit $sgpr35, implicit $sgpr36, implicit $sgpr37, implicit $sgpr38, implicit $sgpr39, implicit $sgpr40, implicit $sgpr41, implicit $sgpr42, implicit $sgpr43, implicit $sgpr44, implicit $sgpr45, implicit $sgpr46, implicit $sgpr47, implicit $sgpr48, implicit $sgpr49, implicit $sgpr50, implicit $sgpr51, implicit $sgpr52, implicit $sgpr53, implicit $sgpr54, implicit $sgpr55, implicit $sgpr56, implicit $sgpr57, implicit $sgpr58, implicit $sgpr59, implicit $sgpr60, implicit $sgpr61, implicit $sgpr62, implicit $sgpr63, implicit $sgpr64, implicit $sgpr65, implicit $sgpr66, implicit $sgpr67, implicit $sgpr68, implicit $sgpr69, implicit $sgpr70, implicit $sgpr71, implicit $sgpr72, implicit $sgpr73, implicit $sgpr74, implicit $sgpr75, implicit $sgpr76, implicit $sgpr77, implicit $sgpr78, implicit $sgpr79, implicit $sgpr80, implicit $sgpr81, implicit $sgpr82, implicit $sgpr83, implicit $sgpr84, implicit $sgpr85, implicit $sgpr86, implicit $sgpr87, implicit $sgpr88, implicit $sgpr89, implicit $sgpr90, implicit $sgpr91, implicit $sgpr92, implicit $sgpr93, implicit $sgpr94, implicit $sgpr95 renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, %stack.1, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir index af61bd70f16b6..442018d21734a 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir @@ -58,6 +58,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 @@ -66,17 +68,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal - ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal - ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_U32 68, %stack.1, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -98,6 +106,8 @@ body: | ; MUBUFW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc @@ -106,17 +116,23 @@ body: | ; MUBUFW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc - ; FLATSCRW64: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc - ; FLATSCRW32: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_U32 68, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir index 7f370b2cca658..3cfb96fede71a 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir @@ -33,6 +33,214 @@ body: | ; GFX8-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX8: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX8-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX8-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX8-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc @@ -58,6 +266,214 @@ body: | ; GFX900-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX900: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX900-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX900-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX900-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc @@ -83,6 +499,246 @@ body: | ; GFX90A-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX90A: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX90A-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX90A-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc @@ -108,6 +764,214 @@ body: | ; GFX1010-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX1010: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX1010-NEXT: {{ $}} + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX1010-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX1010-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX1010-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc_lo @@ -133,6 +997,214 @@ body: | ; GFX1100-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX1100: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX1100-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX1100-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX1100-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc_lo @@ -158,6 +1230,214 @@ body: | ; GFX1200-LABEL: name: s_copy_frame_index_elimination_failure_pei ; GFX1200: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX1200-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base ; GFX1200-NEXT: renamable $sgpr17 = S_MOV_B32 0 ; GFX1200-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc_lo @@ -216,6 +1496,9 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -227,6 +1510,9 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX900-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -237,6 +1523,9 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -247,6 +1536,9 @@ body: | ; GFX1010-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX1010: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1010-NEXT: {{ $}} + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1010-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1010-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX1010-NEXT: $vgpr0 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec @@ -257,6 +1549,9 @@ body: | ; GFX1100-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX1100: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1100-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1100-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX1100-NEXT: $sgpr0 = S_ADDC_U32 $sgpr32, 64, implicit-def $scc, implicit $scc @@ -268,6 +1563,9 @@ body: | ; GFX1200-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc ; GFX1200: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1200-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1200-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX1200-NEXT: $sgpr0 = S_ADDC_U32 $sgpr32, 64, implicit-def $scc, implicit $scc @@ -300,22 +1598,89 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -361,22 +1726,89 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -421,22 +1853,105 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr40, $agpr0, 32, $exec, 64 ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr41, $agpr1, 32, $exec, 64 ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr42, $agpr2, 32, $exec, 64 ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr43, $agpr3, 32, $exec, 64 ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr44, $agpr4, 32, $exec, 64 ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr45, $agpr5, 32, $exec, 64 ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr46, $agpr6, 32, $exec, 64 ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr47, $agpr7, 32, $exec, 64 ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr56, $agpr8, 32, $exec, 64 ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr57, $agpr9, 32, $exec, 64 ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr58, $agpr10, 32, $exec, 64 ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr59, $agpr11, 32, $exec, 64 ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr60, $agpr12, 32, $exec, 64 ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr61, $agpr13, 32, $exec, 64 ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr62, $agpr14, 32, $exec, 64 ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr63, $agpr15, 32, $exec, 64 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -481,22 +1996,89 @@ body: | ; GFX1010-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX1010: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX1010-NEXT: {{ $}} + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 1920 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 1792 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 1664 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 1536 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec_lo, 32, 1408 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 1280 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec_lo, 32, 1152 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec_lo, 32, 1024 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec_lo, 32, 896 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec_lo, 32, 768 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec_lo, 32, 640 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec_lo, 32, 512 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec_lo, 32, 384 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec_lo, 32, 256 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec_lo, 32, 128 ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5) + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec_lo, 32, 0 ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -539,22 +2121,89 @@ body: | ; GFX1100-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX1100: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 1920 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 1792 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 1664 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 1536 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec_lo, 32, 1408 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 1280 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec_lo, 32, 1152 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec_lo, 32, 1024 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.10, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec_lo, 32, 896 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.11, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec_lo, 32, 768 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.12, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec_lo, 32, 640 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.13, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec_lo, 32, 512 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.14, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec_lo, 32, 384 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.15, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec_lo, 32, 256 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.16, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec_lo, 32, 128 ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.17, addrspace 5) + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec_lo, 32, 0 ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -598,22 +2247,89 @@ body: | ; GFX1200-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr ; GFX1200: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 1920 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 1792 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 1664 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 1536 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec_lo, 32, 1408 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 1280 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec_lo, 32, 1152 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec_lo, 32, 1024 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.10, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec_lo, 32, 896 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.11, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec_lo, 32, 768 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.12, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec_lo, 32, 640 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.13, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec_lo, 32, 512 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.14, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec_lo, 32, 384 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.15, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec_lo, 32, 256 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.16, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec_lo, 32, 128 ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.17, addrspace 5) + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec_lo, 32, 0 ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -694,6 +2410,54 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -721,6 +2485,54 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -747,6 +2559,54 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -773,6 +2633,54 @@ body: | ; GFX1010-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX1010: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1010-NEXT: {{ $}} + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -799,6 +2707,54 @@ body: | ; GFX1100-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX1100: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -826,6 +2782,54 @@ body: | ; GFX1200-LABEL: name: materialize_fi_s_mov_b32_offset_24_live_scc_live_vcc_no_sgpr ; GFX1200: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -889,6 +2893,54 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -916,6 +2968,54 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX900-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -942,6 +3042,54 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX90A-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -968,6 +3116,54 @@ body: | ; GFX1010-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX1010: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1010-NEXT: {{ $}} + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1010-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1010-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -994,6 +3190,54 @@ body: | ; GFX1100-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX1100: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1100-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -1021,6 +3265,54 @@ body: | ; GFX1200-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc_no_sgpr ; GFX1200: liveins: $sgpr4, $sgpr5, $vgpr0 ; GFX1200-NEXT: {{ $}} + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX1200-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir index aecff1b13171d..48f1ab0ee3c30 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-scalar-bit-ops.mir @@ -21,21 +21,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_or_b32__inline_imm__fi_offset0 - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_OR_B32 12, killed $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_or_b32__inline_imm__fi_offset0 - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_OR_B32 12, killed $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_or_b32__inline_imm__fi_offset0 - ; FLATSCRW64: renamable $sgpr7 = S_OR_B32 12, $sgpr32, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_OR_B32 12, $sgpr32, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_or_b32__inline_imm__fi_offset0 - ; FLATSCRW32: renamable $sgpr7 = S_OR_B32 12, $sgpr32, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_OR_B32 12, $sgpr32, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_OR_B32 12, %stack.0, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -55,24 +67,36 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_or_b32__literal__fi_offset96 - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_OR_B32 68, killed $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_or_b32__literal__fi_offset96 - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_OR_B32 68, killed $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_or_b32__literal__fi_offset96 - ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_OR_B32 68, killed $sgpr4, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_or_b32__literal__fi_offset96 - ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_OR_B32 68, killed $sgpr4, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_OR_B32 68, %stack.1, implicit-def $scc @@ -96,6 +120,9 @@ body: | ; MUBUFW64-LABEL: name: s_or_b32__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -104,6 +131,9 @@ body: | ; MUBUFW32-LABEL: name: s_or_b32__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -112,6 +142,9 @@ body: | ; FLATSCRW64-LABEL: name: s_or_b32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -119,6 +152,9 @@ body: | ; FLATSCRW32-LABEL: name: s_or_b32__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -143,6 +179,9 @@ body: | ; MUBUFW64-LABEL: name: s_or_b32__sgpr__fi_inlineimm_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -151,6 +190,9 @@ body: | ; MUBUFW32-LABEL: name: s_or_b32__sgpr__fi_inlineimm_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -159,6 +201,9 @@ body: | ; FLATSCRW64-LABEL: name: s_or_b32__sgpr__fi_inlineimm_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -166,6 +211,9 @@ body: | ; FLATSCRW32-LABEL: name: s_or_b32__sgpr__fi_inlineimm_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_OR_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -190,6 +238,9 @@ body: | ; MUBUFW64-LABEL: name: s_and_b32__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_AND_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -198,6 +249,9 @@ body: | ; MUBUFW32-LABEL: name: s_and_b32__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_AND_B32 $sgpr8, killed $sgpr4, implicit-def $scc @@ -206,6 +260,9 @@ body: | ; FLATSCRW64-LABEL: name: s_and_b32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc ; FLATSCRW64-NEXT: renamable $sgpr7 = S_AND_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc @@ -213,6 +270,9 @@ body: | ; FLATSCRW32-LABEL: name: s_and_b32__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc ; FLATSCRW32-NEXT: renamable $sgpr7 = S_AND_B32 $sgpr8, killed $sgpr4, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir index 348743644ce4f..fd296666514ad 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir @@ -16,11 +16,17 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0 - ; MUBUFW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0 - ; FLATSCRW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr0 renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 12, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0, implicit $sgpr0 @@ -39,12 +45,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc - ; MUBUFW32: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc - ; FLATSCRW32: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr0 renamable $vgpr0, $vcc_lo = V_ADD_CO_U32_e64 12, %stack.0, 0, implicit $exec @@ -64,12 +76,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__clamp - ; MUBUFW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__clamp - ; FLATSCRW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 12, %stack.0, 1, implicit $exec @@ -89,12 +107,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp - ; MUBUFW32: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp - ; FLATSCRW32: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 12, %stack.0, 1, implicit $exec @@ -118,6 +142,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -125,6 +152,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -149,6 +179,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -156,6 +189,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -181,6 +217,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -188,6 +227,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -213,6 +255,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -220,6 +265,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -245,6 +293,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -252,6 +303,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc_lo @@ -274,11 +328,15 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; MUBUFW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; MUBUFW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; FLATSCRW32: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead $vcc_lo = V_ADD_CO_U32_e64 12, %stack.1, 1, implicit $exec SI_RETURN implicit $vgpr0 @@ -302,6 +360,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; MUBUFW32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; MUBUFW32-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 @@ -310,6 +371,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; FLATSCRW32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; FLATSCRW32-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 @@ -337,12 +401,16 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 $vgpr1, 0, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 $vgpr1, 0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 renamable $vgpr1, %stack.0, 0, implicit $exec @@ -366,12 +434,16 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0 ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 0, $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0 ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 0, $vgpr1, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0, renamable $sgpr8 = V_ADD_CO_U32_e64 %stack.0, renamable $vgpr1, 0, implicit $exec @@ -396,11 +468,15 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel ; MUBUFW32: liveins: $vgpr0 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc_lo = V_ADD_CO_U32_e64 $vgpr0, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -424,11 +500,15 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel ; MUBUFW32: liveins: $vgpr0 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc_lo = V_ADD_CO_U32_e64 %stack.0, $vgpr0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -453,12 +533,16 @@ body: | ; MUBUFW32-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel ; MUBUFW32: liveins: $vgpr0 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $vgpr0, 32, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0, dead renamable $vcc_lo = V_ADD_CO_U32_e64 $vgpr0, 32, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc_lo = V_ADD_CO_U32_e64 $vgpr0, %stack.1, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir index ade7b4266e9e6..95d9f226c4634 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir @@ -22,12 +22,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0 - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0 - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.0, implicit-def dead $vcc, implicit $exec @@ -47,13 +53,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0_live_vcc - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0_live_vcc - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -75,12 +87,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 28, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 28, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.1, implicit-def dead $vcc, implicit $exec @@ -101,13 +119,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 28, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 28, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -128,12 +152,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0 - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0 - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 68, %stack.0, implicit-def dead $vcc, implicit $exec @@ -153,13 +183,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0_live_vcc - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0_live_vcc - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -181,12 +217,18 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 100, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 100, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 68, %stack.1, implicit-def dead $vcc, implicit $exec @@ -207,13 +249,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 100, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 100, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -237,6 +285,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__vgpr__fi_offset0 ; MUBUFW64: liveins: $vgpr1 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -244,6 +295,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__vgpr__fi_offset0 ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, %stack.0, implicit-def dead $vcc, implicit $exec @@ -266,6 +320,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset0__vgpr ; MUBUFW64: liveins: $vgpr1 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -273,6 +330,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__vgpr ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, $vgpr1, implicit-def dead $vcc, implicit $exec @@ -296,6 +356,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__vgpr__fi_literal_offset ; MUBUFW64: liveins: $vgpr1 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec @@ -304,6 +367,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__vgpr__fi_literal_offset ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 @@ -328,6 +394,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr ; MUBUFW64: liveins: $vgpr1 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec @@ -336,6 +405,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 @@ -360,6 +432,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec @@ -368,6 +443,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec @@ -389,13 +467,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0 - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 0, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0 - ; FLATSCRW64: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12, %stack.0, 0, implicit $exec @@ -415,13 +499,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__clamp - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 0, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__clamp - ; FLATSCRW64: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, %stack.0, 1, implicit $exec @@ -441,13 +531,19 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp - ; MUBUFW64: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUFW64: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 0, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp - ; FLATSCRW64: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12, %stack.0, 1, implicit $exec @@ -471,6 +567,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX7: liveins: $sgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -480,6 +579,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX8: liveins: $sgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -489,6 +591,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX900: liveins: $sgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -498,6 +603,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX90A: liveins: $sgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -507,6 +615,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX10: liveins: $sgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec @@ -515,6 +626,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX942: liveins: $sgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -524,6 +638,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX11: liveins: $sgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -531,6 +648,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr ; GFX12: liveins: $sgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -555,6 +675,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX7: liveins: $sgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -564,6 +687,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX8: liveins: $sgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -573,6 +699,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX900: liveins: $sgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -582,6 +711,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX90A: liveins: $sgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -591,6 +723,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX10: liveins: $sgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec @@ -599,6 +734,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX942: liveins: $sgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -608,6 +746,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX11: liveins: $sgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -615,6 +756,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp ; GFX12: liveins: $sgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -640,6 +784,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX7: liveins: $vgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -649,6 +796,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX8: liveins: $vgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -658,6 +808,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX900: liveins: $vgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -667,6 +820,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX90A: liveins: $vgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -676,6 +832,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX10: liveins: $vgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec @@ -684,6 +843,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX942: liveins: $vgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec @@ -692,6 +854,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX11: liveins: $vgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0 @@ -699,6 +864,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr ; GFX12: liveins: $vgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0 @@ -724,6 +892,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX7: liveins: $vgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -733,6 +904,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX8: liveins: $vgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -742,6 +916,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX900: liveins: $vgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -751,6 +928,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX90A: liveins: $vgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -760,6 +940,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX10: liveins: $vgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec @@ -768,6 +951,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX942: liveins: $vgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec @@ -776,6 +962,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX11: liveins: $vgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0 @@ -783,6 +972,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp ; GFX12: liveins: $vgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 1, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0 @@ -808,6 +1000,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX7: liveins: $vgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -817,6 +1012,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX8: liveins: $vgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -826,6 +1024,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX900: liveins: $vgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -835,6 +1036,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX90A: liveins: $vgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -844,6 +1048,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX10: liveins: $vgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec @@ -852,6 +1059,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX942: liveins: $vgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec @@ -860,6 +1070,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX11: liveins: $vgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -867,6 +1080,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc ; GFX12: liveins: $vgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc @@ -890,13 +1106,17 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.0, implicit-def dead $vcc, implicit $exec SI_RETURN implicit $vgpr0 @@ -918,6 +1138,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel__live_vcc ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec @@ -925,7 +1147,9 @@ body: | ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel__live_vcc - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 0, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.0, implicit-def $vcc, implicit $exec @@ -949,13 +1173,17 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset_literal__kernel ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 84, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset_literal__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 84, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 84, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.1, implicit-def dead $vcc, implicit $exec SI_RETURN implicit $vgpr0 @@ -978,6 +1206,8 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp ; GFX7: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX7-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -987,6 +1217,8 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -996,6 +1228,8 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp ; GFX900: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX900-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -1005,6 +1239,8 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -1014,22 +1250,30 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX942-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; GFX942: $sgpr4 = S_MOV_B32 72 + ; GFX942: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; GFX942-NEXT: $sgpr4 = S_MOV_B32 72 ; GFX942-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $sgpr4, 1, implicit $exec ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; GFX11: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec + ; GFX11: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; GFX11-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX12-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp - ; GFX12: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec + ; GFX12: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; GFX12-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 84, 0, 1, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, %stack.1, 1, implicit $exec SI_RETURN implicit $vgpr0 @@ -1053,6 +1297,9 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1065,6 +1312,9 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1077,6 +1327,9 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1089,6 +1342,9 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1101,6 +1357,9 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1112,6 +1371,9 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec @@ -1124,6 +1386,9 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX11: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX11-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 @@ -1132,6 +1397,9 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required ; GFX12: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec ; GFX12-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 @@ -1160,6 +1428,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required ; MUBUFW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec @@ -1171,6 +1442,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required ; FLATSCRW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec @@ -1202,6 +1476,9 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required ; MUBUFW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec @@ -1213,6 +1490,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required ; FLATSCRW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr8, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255 @@ -1240,6 +1520,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; MUBUFW64: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1248,6 +1530,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0 = V_ADD_CO_U32_e32 renamable $vgpr1, %stack.0, implicit-def dead $vcc, implicit $exec @@ -1271,6 +1555,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; MUBUFW64: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $vgpr1, 0, 0, implicit $exec @@ -1279,6 +1565,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $vgpr1, 0, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 renamable $vgpr1, %stack.0, 0, implicit $exec @@ -1302,6 +1590,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0 ; MUBUFW64: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 0, $vgpr1, 0, implicit $exec @@ -1310,6 +1600,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0 ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 0, $vgpr1, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 %stack.0, renamable $vgpr1, 0, implicit $exec @@ -1334,6 +1626,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset0__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1341,6 +1635,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset0__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, %stack.0, implicit-def dead $vcc, implicit $exec SI_RETURN implicit $vgpr0 @@ -1363,6 +1659,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1370,6 +1668,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, $vgpr0, implicit-def dead $vcc, implicit $exec SI_RETURN implicit $vgpr0 @@ -1393,6 +1693,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1400,6 +1702,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 $vgpr0, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1423,6 +1727,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1430,6 +1736,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 %stack.0, $vgpr0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1452,6 +1760,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1459,6 +1769,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, killed $vgpr0, implicit-def dead $vcc, implicit $exec SI_RETURN implicit $vgpr0 @@ -1481,6 +1793,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_live_vcc ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $exec @@ -1489,6 +1803,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_live_vcc ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, $vgpr0, implicit-def $vcc, implicit $exec @@ -1514,6 +1830,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset32__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 32, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1522,6 +1840,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset32__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 32, $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, %stack.1, implicit-def dead $vcc, implicit $exec @@ -1548,6 +1868,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset72__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 72, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1556,6 +1878,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset72__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 72, $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, %stack.1, implicit-def dead $vcc, implicit $exec @@ -1581,6 +1905,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset72__identity_vgpr__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 72, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1589,6 +1915,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset72__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 72, $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.1, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1614,6 +1942,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset32__identity_vgpr__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 32, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1622,6 +1952,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset32__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 32, $vgpr0, implicit-def dead $vcc, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.1, $vgpr0, implicit-def dead $vcc, implicit $exec @@ -1647,6 +1979,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr0, 32, 0, implicit $exec @@ -1655,6 +1989,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr0, 32, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 $vgpr0, %stack.1, 0, implicit $exec @@ -1679,6 +2015,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_sgpr_kernel ; MUBUFW64: liveins: $sgpr4, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec @@ -1687,6 +2025,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_sgpr_kernel ; FLATSCRW64: liveins: $sgpr4 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 %stack.0, killed $sgpr4, 0, implicit $exec @@ -1710,6 +2050,11 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_sgpr_func ; MUBUFW64: liveins: $sgpr4 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1717,6 +2062,11 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64__fi_sgpr_func ; GFX942: liveins: $sgpr4 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX942-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec ; GFX942-NEXT: SI_RETURN implicit $vgpr0 @@ -1724,12 +2074,22 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64__fi_sgpr_func ; GFX11: liveins: $sgpr4 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 $sgpr32, killed $sgpr4, 0, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX12-LABEL: name: v_add_co_u32_e64__fi_sgpr_func ; GFX12: liveins: $sgpr4 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 $sgpr32, killed $sgpr4, 0, implicit $exec ; GFX12-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 %stack.0, killed $sgpr4, 0, implicit $exec @@ -1754,6 +2114,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_inc_same_vgpr_kernel ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1761,6 +2123,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_inc_same_vgpr_kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 %stack.0, killed $vgpr0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1783,6 +2147,11 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_inc_same_vgpr_func ; MUBUFW64: liveins: $vgpr0 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUFW64-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $vgpr0, killed $vgpr1, 0, implicit $exec ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0 @@ -1790,6 +2159,11 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_inc_same_vgpr_func ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; FLATSCRW64-NEXT: renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 $sgpr32, killed $vgpr0, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, dead renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 %stack.0, killed $vgpr0, 0, implicit $exec @@ -1814,6 +2188,8 @@ body: | ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_sgpr_kernel_live_co ; MUBUFW64: liveins: $sgpr4, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: renamable $vgpr0, renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 0, killed $sgpr4, 0, implicit $exec @@ -1822,6 +2198,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_sgpr_kernel_live_co ; FLATSCRW64: liveins: $sgpr4 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 0, killed $sgpr4, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr4_sgpr5 renamable $vgpr0, renamable $sgpr4_sgpr5 = V_ADD_CO_U32_e64 %stack.0, killed $sgpr4, 0, implicit $exec @@ -1846,6 +2224,8 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX7: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX7-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX7-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1858,6 +2238,8 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX8: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX8-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1870,6 +2252,8 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX900: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX900-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX900-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1882,6 +2266,8 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX90A: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX90A-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1894,6 +2280,8 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX10: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10-NEXT: $sgpr96_sgpr97_sgpr98_sgpr99 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $noreg, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -1905,6 +2293,8 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX942: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX942-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec @@ -1914,6 +2304,8 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX11: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32772, killed $sgpr0, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) @@ -1922,6 +2314,8 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64_fi_sgpr_clobbered_register ; GFX12: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX12-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32768, killed $sgpr0, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) @@ -1950,6 +2344,8 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX7: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX7-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX7-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX7-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1962,6 +2358,8 @@ body: | ; GFX8-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX8: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX8-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX8-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1974,6 +2372,8 @@ body: | ; GFX900-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX900: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX900-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX900-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX900-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1986,6 +2386,8 @@ body: | ; GFX90A-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX90A: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX90A-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $noreg, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 ; GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 @@ -1998,6 +2400,8 @@ body: | ; GFX10-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX10: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10-NEXT: $sgpr96_sgpr97_sgpr98_sgpr99 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX10-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $noreg, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -2009,6 +2413,8 @@ body: | ; GFX942-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX942: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX942-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 32772, implicit $exec ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $sgpr0, 0, implicit $exec @@ -2018,6 +2424,8 @@ body: | ; GFX11-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX11: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32772, killed $sgpr0, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) @@ -2026,6 +2434,8 @@ body: | ; GFX12-LABEL: name: v_add_co_u32_e64_sgpr_fi_clobbered_register ; GFX12: liveins: $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7:0x000000000000003C ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX12-NEXT: renamable $sgpr0 = S_LSHL_B32 renamable $sgpr6, 2, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 32768, killed $sgpr0, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed renamable $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, 0, 0, 0, 0, implicit $exec :: (load (s32), addrspace 5) @@ -2054,11 +2464,19 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX7: liveins: $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX7-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX7-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2066,17 +2484,26 @@ body: | ; GFX7-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX7-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX7-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX7-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX8-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX8-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2084,102 +2511,157 @@ body: | ; GFX8-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX8-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX8-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX8-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX900-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX900-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX900-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX900-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX900-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX900-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX90A-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX90A-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX90A-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX90A-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX90A-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX10: liveins: $sgpr4, $sgpr5 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX10-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX10-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX10-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX10-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX942: liveins: $sgpr4, $sgpr5 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc ; GFX942-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX11: liveins: $sgpr4, $sgpr5 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX11-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX11-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc ; GFX12: liveins: $sgpr4, $sgpr5 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX12-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX12-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec @@ -2204,11 +2686,19 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX7: liveins: $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX7-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX7-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2216,17 +2706,26 @@ body: | ; GFX7-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX7-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX7-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX8-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX8-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2234,105 +2733,160 @@ body: | ; GFX8-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX8-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX8-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX900-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX900-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX900-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX900-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX90A-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX90A-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX90A-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX90A-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX10: liveins: $sgpr4, $sgpr5 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX10-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX10-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX10-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX10-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX10-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX942: liveins: $sgpr4, $sgpr5 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc ; GFX942-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX11: liveins: $sgpr4, $sgpr5 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX11-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 ; ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live ; GFX12: liveins: $sgpr4, $sgpr5 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX12-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8_sgpr9 renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec @@ -2357,11 +2911,17 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX7: liveins: $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX7-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX7-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX7-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2369,17 +2929,24 @@ body: | ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX7-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX7-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX7-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX8-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX8-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8-NEXT: $vcc_lo = S_MOV_B32 12288 @@ -2387,102 +2954,145 @@ body: | ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX8-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX8-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX8-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX900-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX900-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX900-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX900-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX900-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX90A-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX90A-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX90A-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX90A-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX90A-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX10: liveins: $sgpr4, $sgpr5 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX10-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX10-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX10-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX10-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX942: liveins: $sgpr4, $sgpr5 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc ; GFX942-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX942-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX11: liveins: $sgpr4, $sgpr5 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX11-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX11-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc ; GFX12: liveins: $sgpr4, $sgpr5 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX12-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX12-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec @@ -2507,11 +3117,17 @@ body: | ; GFX7-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX7: liveins: $sgpr4, $sgpr5 ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX7-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX7-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX7-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX7-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX7-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX7-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX7-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX7-NEXT: $sgpr6 = S_MOV_B32 12288 @@ -2519,17 +3135,24 @@ body: | ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX7-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX7-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX7-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX7-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX8-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX8-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX8-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX8-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8-NEXT: $sgpr6 = S_MOV_B32 12288 @@ -2537,105 +3160,148 @@ body: | ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX8-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX8-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX8-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX8-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX900-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX900-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX900-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX900-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX900-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX900-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX900-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX900-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX900-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX900-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX900-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX90A-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX90A-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX90A-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX90A-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX90A-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX90A-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX90A-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $vgpr1, 0, implicit $exec ; GFX90A-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX90A-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX90A-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX90A-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX10-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX10: liveins: $sgpr4, $sgpr5 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX10-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX10-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX10-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX10-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX10-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX10-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX10-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX10-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX10-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX942-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX942: liveins: $sgpr4, $sgpr5 ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX942-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX942-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX942-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX942-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX942-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX942-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX942-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, killed $sgpr6, 0, implicit $exec ; GFX942-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX942-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX942-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX942-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX942-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX11-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX11: liveins: $sgpr4, $sgpr5 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX11-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX11-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX11-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX11-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc ; ; GFX12-LABEL: name: v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live ; GFX12: liveins: $sgpr4, $sgpr5 ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX12-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; GFX12-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX12-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX12-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; GFX12-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 64, %stack.1, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir index 6a4671058dc0e..3b1ad0cf28e58 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir @@ -18,22 +18,34 @@ machineFunctionInfo: body: | bb.0: ; MUBUF-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0 - ; MUBUF: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0 - ; MUBUFW32: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0 - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0 - ; FLATSCRW32: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 12, %stack.0, implicit $exec @@ -54,22 +66,34 @@ machineFunctionInfo: body: | bb.0: ; MUBUF-LABEL: name: v_add_u32_e32__inline_imm___fi_offset_inline_imm - ; MUBUF: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 28, killed $vgpr1, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e32__inline_imm___fi_offset_inline_imm - ; MUBUFW32: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 28, killed $vgpr1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__inline_imm___fi_offset_inline_imm - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 28, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__inline_imm___fi_offset_inline_imm - ; FLATSCRW32: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 28, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 12, %stack.1, implicit $exec @@ -89,22 +113,34 @@ machineFunctionInfo: body: | bb.0: ; MUBUF-LABEL: name: v_add_u32_e32__literal__fi_offset0 - ; MUBUF: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 68, killed $vgpr1, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e32__literal__fi_offset0 - ; MUBUFW32: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 68, killed $vgpr1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__literal__fi_offset0 - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 68, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__literal__fi_offset0 - ; FLATSCRW32: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 68, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 68, %stack.0, implicit $exec @@ -125,22 +161,34 @@ machineFunctionInfo: body: | bb.0: ; MUBUF-LABEL: name: v_add_u32_e32__literal__fi_offset0__offset_inlineimm - ; MUBUF: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 100, killed $vgpr1, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e32__literal__fi_offset0__offset_inlineimm - ; MUBUFW32: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 100, killed $vgpr1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__literal__fi_offset0__offset_inlineimm - ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 100, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__literal__fi_offset0__offset_inlineimm - ; FLATSCRW32: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 100, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 68, %stack.1, implicit $exec @@ -163,6 +211,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__vgpr__fi_offset0 ; MUBUF: liveins: $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -170,6 +221,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__vgpr__fi_offset0 ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -177,12 +231,18 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__vgpr__fi_offset0 ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__vgpr__fi_offset0 ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, %stack.0, implicit $exec @@ -205,6 +265,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset0__vgpr ; MUBUF: liveins: $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -212,6 +275,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset0__vgpr ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -219,12 +285,18 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset0__vgpr ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset0__vgpr ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 %stack.0, $vgpr1, implicit $exec @@ -248,6 +320,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__vgpr__fi_literal_offset ; MUBUF: liveins: $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -256,6 +331,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__vgpr__fi_literal_offset ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -264,6 +342,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__vgpr__fi_literal_offset ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 @@ -271,6 +352,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e32__vgpr__fi_literal_offset ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -295,6 +379,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_literal_offset__vgpr ; MUBUF: liveins: $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -303,6 +390,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_literal_offset__vgpr ; MUBUFW32: liveins: $vgpr1 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr2 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -311,6 +401,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_literal_offset__vgpr ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 @@ -318,6 +411,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_literal_offset__vgpr ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr32, $vgpr1, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -342,6 +438,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__sgpr__fi_literal_offset ; MUBUF: liveins: $sgpr8 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -350,6 +449,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -358,6 +460,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -366,6 +471,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e32__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec @@ -387,21 +495,33 @@ machineFunctionInfo: body: | bb.0: ; MUBUF-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0 - ; MUBUF: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0 - ; MUBUFW32: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec + ; MUBUFW32: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0 - ; FLATSCRW64: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0 - ; FLATSCRW32: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, 12, 0, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, 12, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 12, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -424,6 +544,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__fi_literal_offset__sgpr ; MUBUF: liveins: $sgpr8 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -433,6 +556,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_literal_offset__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, killed $vgpr0, 0, implicit $exec @@ -441,6 +567,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_literal_offset__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -450,6 +579,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_literal_offset__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $sgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, killed $vgpr0, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -474,6 +606,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset ; MUBUF: liveins: $vgpr8 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -483,6 +618,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr0, 128, 0, implicit $exec @@ -491,6 +629,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset ; FLATSCRW64: liveins: $vgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec @@ -499,6 +640,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr0, 128, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -523,6 +667,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp ; MUBUF: liveins: $vgpr8 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -532,6 +679,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr0, 128, 1, implicit $exec @@ -540,6 +690,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp ; FLATSCRW64: liveins: $vgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec @@ -548,6 +701,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr0, 128, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -572,6 +728,9 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp ; MUBUF: liveins: $vgpr8 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -581,6 +740,9 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp ; MUBUFW32: liveins: $vgpr8 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, killed $vgpr0, 1, implicit $exec @@ -589,6 +751,9 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp ; FLATSCRW64: liveins: $vgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, killed $vgpr0, 1, implicit $exec @@ -597,6 +762,9 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr32, $vgpr8, 0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, killed $vgpr0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 @@ -622,6 +790,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel ; MUBUF: liveins: $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec @@ -631,6 +801,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel ; MUBUFW32: liveins: $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, $vgpr8, 1, implicit $exec @@ -639,6 +811,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel ; FLATSCRW64: liveins: $vgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, $vgpr8, 1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 @@ -646,6 +820,8 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel ; FLATSCRW32: liveins: $vgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, $vgpr8, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 %stack.1, $vgpr8, 1, implicit $exec @@ -668,6 +844,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec @@ -676,17 +854,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel - ; FLATSCRW32: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 12, %stack.0, implicit $exec SI_RETURN implicit $vgpr0 @@ -708,6 +892,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec @@ -716,17 +902,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel - ; FLATSCRW32: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 12, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 12, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -751,6 +943,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec @@ -759,17 +953,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel - ; FLATSCRW32: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 12, %stack.1, implicit $exec SI_RETURN implicit $vgpr0 @@ -792,6 +992,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec @@ -800,17 +1002,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel - ; FLATSCRW32: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 12, %stack.1, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -833,6 +1041,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec @@ -841,17 +1051,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel - ; FLATSCRW64: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel - ; FLATSCRW32: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 44, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 %stack.1, 12, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -874,6 +1090,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec @@ -882,17 +1100,23 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp - ; FLATSCRW64: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec + ; FLATSCRW64: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp - ; FLATSCRW32: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec + ; FLATSCRW32: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 44, 0, 1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 12, %stack.1, 1, implicit $exec SI_RETURN implicit $vgpr0 @@ -917,6 +1141,8 @@ body: | ; MUBUF-LABEL: name: killed_reg_regression ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec @@ -930,6 +1156,8 @@ body: | ; MUBUFW32-LABEL: name: killed_reg_regression ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec @@ -943,6 +1171,8 @@ body: | ; FLATSCRW64-LABEL: name: killed_reg_regression ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec @@ -954,6 +1184,8 @@ body: | ; FLATSCRW32-LABEL: name: killed_reg_regression ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec @@ -987,6 +1219,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; MUBUF: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -995,6 +1229,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; MUBUFW32: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1003,12 +1239,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0 = V_ADD_U32_e32 renamable $vgpr1, %stack.0, implicit $exec @@ -1032,6 +1272,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0 ; MUBUF: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1040,6 +1282,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0 ; MUBUFW32: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1048,12 +1292,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0 ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0 ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0 = V_ADD_U32_e32 %stack.0, renamable $vgpr1, implicit $exec @@ -1077,6 +1325,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after ; MUBUF: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec @@ -1085,6 +1335,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec @@ -1093,12 +1345,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 renamable $vgpr0 = V_ADD_U32_e32 renamable $sgpr8, %stack.0, implicit $exec @@ -1122,6 +1378,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; MUBUF: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1130,6 +1388,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; MUBUFW32: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec @@ -1138,12 +1398,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW64: liveins: $vgpr1 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after ; FLATSCRW32: liveins: $vgpr1 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 renamable $vgpr0 = V_ADD_U32_e64 renamable $vgpr1, %stack.0, 0, implicit $exec @@ -1168,6 +1432,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after ; MUBUF: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -1177,6 +1443,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -1186,6 +1454,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 @@ -1193,6 +1463,8 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 @@ -1218,6 +1490,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after ; MUBUF: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec @@ -1227,6 +1501,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, 72, 0, implicit $exec @@ -1235,6 +1511,8 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, $sgpr8, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 @@ -1242,6 +1520,8 @@ body: | ; FLATSCRW32-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, 72, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8 renamable $vgpr0 = V_ADD_U32_e64 renamable $sgpr8, %stack.1, 0, implicit $exec @@ -1266,6 +1546,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -1273,6 +1555,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -1280,11 +1564,15 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, %stack.0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1307,6 +1595,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -1314,6 +1604,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -1321,11 +1613,15 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 %stack.0, $vgpr0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1349,6 +1645,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -1356,6 +1654,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -1363,11 +1663,15 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, %stack.0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1391,6 +1695,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -1398,6 +1704,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -1405,11 +1713,15 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 %stack.0, $vgpr0, 0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1432,6 +1744,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 @@ -1439,6 +1753,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 @@ -1446,11 +1762,15 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 %stack.0, killed $vgpr0, implicit $exec SI_RETURN implicit $vgpr0 @@ -1475,6 +1795,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec @@ -1483,6 +1805,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec @@ -1491,12 +1815,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, %stack.1, implicit $exec @@ -1523,6 +1851,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec @@ -1531,6 +1861,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec @@ -1539,12 +1871,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, %stack.1, implicit $exec @@ -1570,6 +1906,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec @@ -1578,6 +1916,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec @@ -1586,12 +1926,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 %stack.1, $vgpr0, implicit $exec @@ -1617,6 +1961,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec @@ -1625,6 +1971,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec @@ -1633,12 +1981,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 %stack.1, $vgpr0, implicit $exec @@ -1664,6 +2016,8 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec @@ -1672,6 +2026,8 @@ body: | ; MUBUFW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec @@ -1680,12 +2036,16 @@ body: | ; FLATSCRW64-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel ; FLATSCRW64: liveins: $vgpr0 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel ; FLATSCRW32: liveins: $vgpr0 ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; FLATSCRW32-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, %stack.1, 0, implicit $exec @@ -1710,49 +2070,70 @@ body: | ; MUBUF-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error ; MUBUF: liveins: $sgpr4, $sgpr5 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; MUBUF-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; MUBUF-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; MUBUF-NEXT: $vgpr1 = V_ADD_U32_e32 12288, killed $vgpr1, implicit $exec ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $vgpr1, 0, implicit $exec ; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; MUBUF-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; MUBUF-NEXT: SI_RETURN implicit $vgpr0 ; ; MUBUFW32-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error ; MUBUFW32: liveins: $sgpr4, $sgpr5 ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUFW32-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262112, implicit-def $scc ; MUBUFW32-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc ; MUBUFW32-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; MUBUFW32-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; MUBUFW32-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; MUBUFW32-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1048576, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr33, implicit $exec ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12352, killed $vgpr1, 0, implicit $exec ; MUBUFW32-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; MUBUFW32-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; MUBUFW32-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; MUBUFW32-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_u32_e64_imm_fi_vop3_literal_error ; FLATSCRW64: liveins: $sgpr4, $sgpr5 ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; FLATSCRW64-NEXT: $sgpr4 = frame-setup COPY $sgpr33 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr4 ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCRW64-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; FLATSCRW64-NEXT: $sgpr5 = frame-setup COPY $sgpr34 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr5 ; FLATSCRW64-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; FLATSCRW64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCRW64-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; FLATSCRW64-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 12288, implicit-def $scc ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 64, killed $sgpr6, 0, implicit $exec ; FLATSCRW64-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; FLATSCRW64-NEXT: $sgpr34 = frame-destroy COPY $sgpr5 + ; FLATSCRW64-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCRW64-NEXT: $sgpr33 = frame-destroy COPY $sgpr4 ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0 renamable $vgpr0 = V_ADD_U32_e64 64, %stack.1, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/entry-function-cfi.mir b/llvm/test/CodeGen/AMDGPU/entry-function-cfi.mir new file mode 100644 index 0000000000000..dd2503502211f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/entry-function-cfi.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=prologepilog -o - %s | FileCheck %s + +--- | + + define protected amdgpu_kernel void @kern1() { + entry: + ret void + } +... +--- +name: kern1 +alignment: 1 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + isEntryFunction: true + scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + workGroupIDX: { reg: '$sgpr0' } + privateSegmentWaveByteOffset: { reg: '$sgpr1' } + workItemIDX: { reg: '$vgpr0' } +body: | + bb.0: + ; CHECK-LABEL: name: kern1 + ; CHECK: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: S_ENDPGM 0 + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll b/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll index 76a2114a000cf..f5832e6f307fd 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/fix-frame-reg-in-custom-csr-spills.ll @@ -22,13 +22,14 @@ define void @test_stack_realign(<8 x i32> %val, i32 %idx) #0 { ; GCN-NEXT: v_writelane_b32 v42, s34, 3 ; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: s_addk_i32 s32, 0x3000 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_writelane_b32 v42, s30, 0 +; GCN-NEXT: v_writelane_b32 v42, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v42, s30, 0 ; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:92 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:88 @@ -46,7 +47,6 @@ define void @test_stack_realign(<8 x i32> %val, i32 %idx) #0 { ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:64 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, v8 -; GCN-NEXT: v_writelane_b32 v42, s31, 1 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -55,8 +55,8 @@ define void @test_stack_realign(<8 x i32> %val, i32 %idx) #0 { ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: v_readlane_b32 s31, v42, 1 ; GCN-NEXT: v_readlane_b32 s30, v42, 0 +; GCN-NEXT: v_readlane_b32 s31, v42, 1 ; GCN-NEXT: s_mov_b32 s32, s34 ; GCN-NEXT: v_readlane_b32 s4, v42, 2 ; GCN-NEXT: v_readlane_b32 s34, v42, 3 diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir b/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir index 17ec6f5b37241..e861a15981186 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir @@ -21,6 +21,8 @@ body: | ; GFX11-LABEL: name: tied_operand_test ; GFX11: liveins: $sgpr0_sgpr1 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11-NEXT: renamable $vgpr0 = V_MOV_B32_e32 123, implicit $exec ; GFX11-NEXT: renamable $vgpr0 = SCRATCH_LOAD_SHORT_D16_HI_ST 0, 0, killed renamable $vgpr0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM killed renamable $sgpr0_sgpr1, 4, 0 diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir index 81bd8baaa0e5d..5c14af9673d1e 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir @@ -21,6 +21,9 @@ body: | ; GCN-LABEL: name: func_add_constant_to_fi_divergent_i32 ; GCN: liveins: $vgpr31, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 1023, killed $vgpr31, implicit $exec ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -55,6 +58,10 @@ body: | ; GCN-LABEL: name: func_add_constant_to_fi_uniform_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GCN-NEXT: renamable $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 $sgpr0, 4, implicit-def dead $scc ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr4, implicit $exec @@ -91,6 +98,12 @@ body: | ; GCN-LABEL: name: func_add_constant_to_fi_uniform_SCC_clobber_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 ; GCN-NEXT: renamable $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr0, 4, implicit-def $scc ; GCN-NEXT: renamable $sgpr5 = S_ADDC_U32 $sgpr4, 1234567, implicit-def $scc, implicit $scc @@ -132,6 +145,10 @@ body: | ; GCN-LABEL: name: func_other_fi_user_non_inline_imm_offset_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 7, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec ; GCN-NEXT: $sgpr5 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc @@ -168,6 +185,12 @@ body: | ; GCN-LABEL: name: func_add_constant_to_fi_uniform_live_SCC_i32 ; GCN: liveins: $sgpr30_sgpr31, $sgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr10, 4, implicit-def $scc ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GCN-NEXT: $sgpr0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec @@ -204,6 +227,9 @@ body: | ; GCN-LABEL: name: func_frame_idx_at_the_end_of_bb ; GCN: liveins: $vgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 1023, killed $vgpr31, implicit $exec ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -223,7 +249,10 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_dead_scc - ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; GCN: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4 renamable $sgpr4 = S_MOV_B32 %stack.0 S_ENDPGM 0, implicit $sgpr4 @@ -244,6 +273,9 @@ body: | ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc ; GCN: liveins: $sgpr4, $sgpr5 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GCN-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GCN-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec @@ -266,7 +298,10 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_64_dead_scc - ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc + ; GCN: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; GCN-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 64, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4 renamable $sgpr4 = S_MOV_B32 %stack.1 @@ -285,7 +320,10 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: materialize_fi_s_mov_b32_offset_68_dead_scc - ; GCN: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc + ; GCN: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; GCN-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr4 renamable $sgpr4 = S_MOV_B32 %stack.1 @@ -308,6 +346,9 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64 @@ -318,6 +359,9 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX900-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec @@ -327,6 +371,9 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec @@ -353,6 +400,9 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc ; GFX8: liveins: $sgpr4, $sgpr5 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: $sgpr4 = S_MOV_B32 68 @@ -363,6 +413,9 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc ; GFX900: liveins: $sgpr4, $sgpr5 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX900-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec @@ -372,6 +425,9 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_68_live_scc ; GFX90A: liveins: $sgpr4, $sgpr5 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 68, killed $vgpr0, implicit $exec @@ -401,22 +457,81 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -461,22 +576,81 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -520,22 +694,97 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr40, $agpr0, 32, $exec, 64 ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr41, $agpr1, 32, $exec, 64 ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr42, $agpr2, 32, $exec, 64 ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr43, $agpr3, 32, $exec, 64 ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr44, $agpr4, 32, $exec, 64 ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr45, $agpr5, 32, $exec, 64 ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr46, $agpr6, 32, $exec, 64 ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr47, $agpr7, 32, $exec, 64 ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr56, $agpr8, 32, $exec, 64 ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr57, $agpr9, 32, $exec, 64 ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr58, $agpr10, 32, $exec, 64 ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr59, $agpr11, 32, $exec, 64 ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr60, $agpr12, 32, $exec, 64 ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr61, $agpr13, 32, $exec, 64 ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr62, $agpr14, 32, $exec, 64 ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr63, $agpr15, 32, $exec, 64 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -615,22 +864,89 @@ body: | ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -675,22 +991,89 @@ body: | ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX900-NEXT: {{ $}} + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 3840 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 3584 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 3328 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 3072 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 2816 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 2560 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 2304 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 2048 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1792 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1536 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1280 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1024 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 768 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 512 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 256 ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5) + ; GFX900-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 0 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 @@ -734,22 +1117,105 @@ body: | ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs ; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 ; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr40, $agpr0, 32, $exec, 64 ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr41, $agpr1, 32, $exec, 64 ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr42, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr42, $agpr2, 32, $exec, 64 ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr43, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr43, $agpr3, 32, $exec, 64 ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr44, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr44, $agpr4, 32, $exec, 64 ; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr45, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr45, $agpr5, 32, $exec, 64 ; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr46, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr46, $agpr6, 32, $exec, 64 ; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr47, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr47, $agpr7, 32, $exec, 64 ; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr56, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr56, $agpr8, 32, $exec, 64 ; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr57, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr57, $agpr9, 32, $exec, 64 ; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr58, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr58, $agpr10, 32, $exec, 64 ; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr59, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr59, $agpr11, 32, $exec, 64 ; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr60, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr60, $agpr12, 32, $exec, 64 ; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr61, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr61, $agpr13, 32, $exec, 64 ; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr62, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr62, $agpr14, 32, $exec, 64 ; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_register_mask $vgpr63, $agpr15, 32, $exec, 64 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll index 2e88da142bb41..2760c7a2187b4 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -16,18 +16,18 @@ define void @callee_with_stack_and_call() #0 { ; SPILL-TO-VGPR-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] ; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s4, 2 -; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x400 ; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s30, 0 +; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x400 +; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s31, 1 ; SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0 ; SPILL-TO-VGPR-NEXT: s_getpc_b64 s[4:5] ; SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; SPILL-TO-VGPR-NEXT: v_writelane_b32 v40, s31, 1 ; SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1 ; SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v40, 0 +; SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1 ; SPILL-TO-VGPR-NEXT: s_mov_b32 s32, s33 ; SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v40, 2 ; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -46,21 +46,14 @@ define void @callee_with_stack_and_call() #0 { ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800 ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s30, 0 +; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s31, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s31, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] ; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0 ; NO-SPILL-TO-VGPR-NEXT: s_getpc_b64 s[4:5] ; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 @@ -69,20 +62,12 @@ define void @callee_with_stack_and_call() #0 { ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v0, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v0, 0 +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v0, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll index 831d10480c51c..2a27263e16548 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll @@ -1727,21 +1727,21 @@ define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inr ; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-NEXT: v_writelane_b32 v40, s19, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_getpc_b64 s[20:21] ; GFX9-NEXT: s_add_u32 s20, s20, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s21, s21, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s19, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s1, s17 ; GFX9-NEXT: s_mov_b32 s0, s16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -1759,19 +1759,18 @@ define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inr ; GFX11-NEXT: s_or_saveexec_b32 s16, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s16 +; GFX11-NEXT: v_writelane_b32 v40, s3, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[16:17] ; GFX11-NEXT: s_add_u32 s16, s16, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s17, s17, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v40, s3, 2 ; GFX11-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2132,21 +2131,24 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX9-NEXT: s_or_saveexec_b64 s[40:41], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[40:41] +; GFX9-NEXT: v_writelane_b32 v40, s29, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v2, s28 ; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48 ; GFX9-NEXT: v_mov_b32_e32 v5, s27 ; GFX9-NEXT: v_mov_b32_e32 v4, s26 ; GFX9-NEXT: v_mov_b32_e32 v3, s25 ; GFX9-NEXT: v_mov_b32_e32 v2, s24 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:32 -; GFX9-NEXT: v_writelane_b32 v40, s29, 2 +; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: v_mov_b32_e32 v5, s23 ; GFX9-NEXT: v_mov_b32_e32 v4, s22 ; GFX9-NEXT: v_mov_b32_e32 v3, s21 ; GFX9-NEXT: v_mov_b32_e32 v2, s20 ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: v_mov_b32_e32 v3, s17 ; GFX9-NEXT: v_mov_b32_e32 v2, s16 ; GFX9-NEXT: s_getpc_b64 s[16:17] @@ -2155,12 +2157,11 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v5, s19 ; GFX9-NEXT: v_mov_b32_e32 v4, s18 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -2178,7 +2179,10 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX11-NEXT: s_or_saveexec_b32 s26, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s26 +; GFX11-NEXT: v_writelane_b32 v40, s25, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v3, s21 ; GFX11-NEXT: v_dual_mov_b32 v2, s20 :: v_dual_mov_b32 v9, s19 ; GFX11-NEXT: s_getpc_b64 s[20:21] @@ -2187,22 +2191,18 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX11-NEXT: v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v7, s17 ; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v13, s3 ; GFX11-NEXT: s_load_b64 s[16:17], s[20:21], 0x0 -; GFX11-NEXT: v_writelane_b32 v40, s25, 2 ; GFX11-NEXT: v_dual_mov_b32 v14, s24 :: v_dual_mov_b32 v5, s23 ; GFX11-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v10, s0 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: global_store_b32 v[0:1], v14, off offset:48 ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:32 ; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16 ; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll index 9d137fb4101e4..031f25bec26fe 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll @@ -13,6 +13,7 @@ define amdgpu_gfx void @gfx_func() { ; SDAG-NEXT: s_or_saveexec_b64 s[34:35], -1 ; SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; SDAG-NEXT: s_mov_b64 exec, s[34:35] +; SDAG-NEXT: s_addk_i32 s32, 0x400 ; SDAG-NEXT: v_writelane_b32 v40, s4, 0 ; SDAG-NEXT: v_writelane_b32 v40, s5, 1 ; SDAG-NEXT: v_writelane_b32 v40, s6, 2 @@ -39,47 +40,46 @@ define amdgpu_gfx void @gfx_func() { ; SDAG-NEXT: v_writelane_b32 v40, s27, 23 ; SDAG-NEXT: v_writelane_b32 v40, s28, 24 ; SDAG-NEXT: v_writelane_b32 v40, s29, 25 -; SDAG-NEXT: v_writelane_b32 v40, s30, 26 -; SDAG-NEXT: v_writelane_b32 v40, s31, 27 -; SDAG-NEXT: v_writelane_b32 v40, s72, 28 -; SDAG-NEXT: v_writelane_b32 v40, s73, 29 -; SDAG-NEXT: v_writelane_b32 v40, s74, 30 -; SDAG-NEXT: v_writelane_b32 v40, s75, 31 -; SDAG-NEXT: v_writelane_b32 v40, s76, 32 -; SDAG-NEXT: v_writelane_b32 v40, s77, 33 -; SDAG-NEXT: v_writelane_b32 v40, s78, 34 -; SDAG-NEXT: v_writelane_b32 v40, s79, 35 -; SDAG-NEXT: v_writelane_b32 v40, s88, 36 -; SDAG-NEXT: v_writelane_b32 v40, s89, 37 -; SDAG-NEXT: v_writelane_b32 v40, s90, 38 -; SDAG-NEXT: v_writelane_b32 v40, s91, 39 -; SDAG-NEXT: v_writelane_b32 v40, s92, 40 -; SDAG-NEXT: v_writelane_b32 v40, s93, 41 -; SDAG-NEXT: v_writelane_b32 v40, s94, 42 +; SDAG-NEXT: v_writelane_b32 v40, s72, 26 +; SDAG-NEXT: v_writelane_b32 v40, s73, 27 +; SDAG-NEXT: v_writelane_b32 v40, s74, 28 +; SDAG-NEXT: v_writelane_b32 v40, s75, 29 +; SDAG-NEXT: v_writelane_b32 v40, s76, 30 +; SDAG-NEXT: v_writelane_b32 v40, s77, 31 +; SDAG-NEXT: v_writelane_b32 v40, s78, 32 +; SDAG-NEXT: v_writelane_b32 v40, s79, 33 +; SDAG-NEXT: v_writelane_b32 v40, s88, 34 +; SDAG-NEXT: v_writelane_b32 v40, s89, 35 +; SDAG-NEXT: v_writelane_b32 v40, s90, 36 +; SDAG-NEXT: v_writelane_b32 v40, s91, 37 +; SDAG-NEXT: v_writelane_b32 v40, s92, 38 +; SDAG-NEXT: v_writelane_b32 v40, s93, 39 +; SDAG-NEXT: v_writelane_b32 v40, s94, 40 +; SDAG-NEXT: v_writelane_b32 v40, s95, 41 +; SDAG-NEXT: v_writelane_b32 v40, s30, 42 +; SDAG-NEXT: v_writelane_b32 v40, s31, 43 ; SDAG-NEXT: s_mov_b32 s35, extern_c_func@abs32@hi ; SDAG-NEXT: s_mov_b32 s34, extern_c_func@abs32@lo ; SDAG-NEXT: s_mov_b64 s[8:9], 0 -; SDAG-NEXT: s_addk_i32 s32, 0x400 -; SDAG-NEXT: v_writelane_b32 v40, s95, 43 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[34:35] -; SDAG-NEXT: v_readlane_b32 s95, v40, 43 -; SDAG-NEXT: v_readlane_b32 s94, v40, 42 -; SDAG-NEXT: v_readlane_b32 s93, v40, 41 -; SDAG-NEXT: v_readlane_b32 s92, v40, 40 -; SDAG-NEXT: v_readlane_b32 s91, v40, 39 -; SDAG-NEXT: v_readlane_b32 s90, v40, 38 -; SDAG-NEXT: v_readlane_b32 s89, v40, 37 -; SDAG-NEXT: v_readlane_b32 s88, v40, 36 -; SDAG-NEXT: v_readlane_b32 s79, v40, 35 -; SDAG-NEXT: v_readlane_b32 s78, v40, 34 -; SDAG-NEXT: v_readlane_b32 s77, v40, 33 -; SDAG-NEXT: v_readlane_b32 s76, v40, 32 -; SDAG-NEXT: v_readlane_b32 s75, v40, 31 -; SDAG-NEXT: v_readlane_b32 s74, v40, 30 -; SDAG-NEXT: v_readlane_b32 s73, v40, 29 -; SDAG-NEXT: v_readlane_b32 s72, v40, 28 -; SDAG-NEXT: v_readlane_b32 s31, v40, 27 -; SDAG-NEXT: v_readlane_b32 s30, v40, 26 +; SDAG-NEXT: v_readlane_b32 s30, v40, 42 +; SDAG-NEXT: v_readlane_b32 s31, v40, 43 +; SDAG-NEXT: v_readlane_b32 s95, v40, 41 +; SDAG-NEXT: v_readlane_b32 s94, v40, 40 +; SDAG-NEXT: v_readlane_b32 s93, v40, 39 +; SDAG-NEXT: v_readlane_b32 s92, v40, 38 +; SDAG-NEXT: v_readlane_b32 s91, v40, 37 +; SDAG-NEXT: v_readlane_b32 s90, v40, 36 +; SDAG-NEXT: v_readlane_b32 s89, v40, 35 +; SDAG-NEXT: v_readlane_b32 s88, v40, 34 +; SDAG-NEXT: v_readlane_b32 s79, v40, 33 +; SDAG-NEXT: v_readlane_b32 s78, v40, 32 +; SDAG-NEXT: v_readlane_b32 s77, v40, 31 +; SDAG-NEXT: v_readlane_b32 s76, v40, 30 +; SDAG-NEXT: v_readlane_b32 s75, v40, 29 +; SDAG-NEXT: v_readlane_b32 s74, v40, 28 +; SDAG-NEXT: v_readlane_b32 s73, v40, 27 +; SDAG-NEXT: v_readlane_b32 s72, v40, 26 ; SDAG-NEXT: v_readlane_b32 s29, v40, 25 ; SDAG-NEXT: v_readlane_b32 s28, v40, 24 ; SDAG-NEXT: v_readlane_b32 s27, v40, 23 @@ -122,6 +122,7 @@ define amdgpu_gfx void @gfx_func() { ; GISEL-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[34:35] +; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s4, 0 ; GISEL-NEXT: v_writelane_b32 v40, s5, 1 ; GISEL-NEXT: v_writelane_b32 v40, s6, 2 @@ -148,47 +149,46 @@ define amdgpu_gfx void @gfx_func() { ; GISEL-NEXT: v_writelane_b32 v40, s27, 23 ; GISEL-NEXT: v_writelane_b32 v40, s28, 24 ; GISEL-NEXT: v_writelane_b32 v40, s29, 25 -; GISEL-NEXT: v_writelane_b32 v40, s30, 26 -; GISEL-NEXT: v_writelane_b32 v40, s31, 27 -; GISEL-NEXT: v_writelane_b32 v40, s72, 28 -; GISEL-NEXT: v_writelane_b32 v40, s73, 29 -; GISEL-NEXT: v_writelane_b32 v40, s74, 30 -; GISEL-NEXT: v_writelane_b32 v40, s75, 31 -; GISEL-NEXT: v_writelane_b32 v40, s76, 32 -; GISEL-NEXT: v_writelane_b32 v40, s77, 33 -; GISEL-NEXT: v_writelane_b32 v40, s78, 34 -; GISEL-NEXT: v_writelane_b32 v40, s79, 35 -; GISEL-NEXT: v_writelane_b32 v40, s88, 36 -; GISEL-NEXT: v_writelane_b32 v40, s89, 37 -; GISEL-NEXT: v_writelane_b32 v40, s90, 38 -; GISEL-NEXT: v_writelane_b32 v40, s91, 39 -; GISEL-NEXT: v_writelane_b32 v40, s92, 40 -; GISEL-NEXT: v_writelane_b32 v40, s93, 41 -; GISEL-NEXT: v_writelane_b32 v40, s94, 42 +; GISEL-NEXT: v_writelane_b32 v40, s72, 26 +; GISEL-NEXT: v_writelane_b32 v40, s73, 27 +; GISEL-NEXT: v_writelane_b32 v40, s74, 28 +; GISEL-NEXT: v_writelane_b32 v40, s75, 29 +; GISEL-NEXT: v_writelane_b32 v40, s76, 30 +; GISEL-NEXT: v_writelane_b32 v40, s77, 31 +; GISEL-NEXT: v_writelane_b32 v40, s78, 32 +; GISEL-NEXT: v_writelane_b32 v40, s79, 33 +; GISEL-NEXT: v_writelane_b32 v40, s88, 34 +; GISEL-NEXT: v_writelane_b32 v40, s89, 35 +; GISEL-NEXT: v_writelane_b32 v40, s90, 36 +; GISEL-NEXT: v_writelane_b32 v40, s91, 37 +; GISEL-NEXT: v_writelane_b32 v40, s92, 38 +; GISEL-NEXT: v_writelane_b32 v40, s93, 39 +; GISEL-NEXT: v_writelane_b32 v40, s94, 40 +; GISEL-NEXT: v_writelane_b32 v40, s95, 41 +; GISEL-NEXT: v_writelane_b32 v40, s30, 42 +; GISEL-NEXT: v_writelane_b32 v40, s31, 43 ; GISEL-NEXT: s_mov_b32 s34, extern_c_func@abs32@lo ; GISEL-NEXT: s_mov_b32 s35, extern_c_func@abs32@hi ; GISEL-NEXT: s_mov_b64 s[8:9], 0 -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s95, 43 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GISEL-NEXT: v_readlane_b32 s95, v40, 43 -; GISEL-NEXT: v_readlane_b32 s94, v40, 42 -; GISEL-NEXT: v_readlane_b32 s93, v40, 41 -; GISEL-NEXT: v_readlane_b32 s92, v40, 40 -; GISEL-NEXT: v_readlane_b32 s91, v40, 39 -; GISEL-NEXT: v_readlane_b32 s90, v40, 38 -; GISEL-NEXT: v_readlane_b32 s89, v40, 37 -; GISEL-NEXT: v_readlane_b32 s88, v40, 36 -; GISEL-NEXT: v_readlane_b32 s79, v40, 35 -; GISEL-NEXT: v_readlane_b32 s78, v40, 34 -; GISEL-NEXT: v_readlane_b32 s77, v40, 33 -; GISEL-NEXT: v_readlane_b32 s76, v40, 32 -; GISEL-NEXT: v_readlane_b32 s75, v40, 31 -; GISEL-NEXT: v_readlane_b32 s74, v40, 30 -; GISEL-NEXT: v_readlane_b32 s73, v40, 29 -; GISEL-NEXT: v_readlane_b32 s72, v40, 28 -; GISEL-NEXT: v_readlane_b32 s31, v40, 27 -; GISEL-NEXT: v_readlane_b32 s30, v40, 26 +; GISEL-NEXT: v_readlane_b32 s30, v40, 42 +; GISEL-NEXT: v_readlane_b32 s31, v40, 43 +; GISEL-NEXT: v_readlane_b32 s95, v40, 41 +; GISEL-NEXT: v_readlane_b32 s94, v40, 40 +; GISEL-NEXT: v_readlane_b32 s93, v40, 39 +; GISEL-NEXT: v_readlane_b32 s92, v40, 38 +; GISEL-NEXT: v_readlane_b32 s91, v40, 37 +; GISEL-NEXT: v_readlane_b32 s90, v40, 36 +; GISEL-NEXT: v_readlane_b32 s89, v40, 35 +; GISEL-NEXT: v_readlane_b32 s88, v40, 34 +; GISEL-NEXT: v_readlane_b32 s79, v40, 33 +; GISEL-NEXT: v_readlane_b32 s78, v40, 32 +; GISEL-NEXT: v_readlane_b32 s77, v40, 31 +; GISEL-NEXT: v_readlane_b32 s76, v40, 30 +; GISEL-NEXT: v_readlane_b32 s75, v40, 29 +; GISEL-NEXT: v_readlane_b32 s74, v40, 28 +; GISEL-NEXT: v_readlane_b32 s73, v40, 27 +; GISEL-NEXT: v_readlane_b32 s72, v40, 26 ; GISEL-NEXT: v_readlane_b32 s29, v40, 25 ; GISEL-NEXT: v_readlane_b32 s28, v40, 24 ; GISEL-NEXT: v_readlane_b32 s27, v40, 23 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index 234eaa8af7edf..5ada43298deb6 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -133,16 +133,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -162,16 +162,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: v_mov_b32_e32 v0, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -191,17 +191,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: v_mov_b32_e32 v0, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: v_mov_b32_e32 v0, 1 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -221,16 +220,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -253,19 +252,19 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_signext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_signext@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -284,19 +283,19 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc -; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc +; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_signext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_signext@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -315,19 +314,19 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_signext@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_signext@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -346,19 +345,19 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc -; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc +; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_signext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_signext@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -382,19 +381,19 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_zeroext@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_zeroext@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -413,19 +412,19 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc -; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc +; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_zeroext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_zeroext@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -444,19 +443,19 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_zeroext@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_zeroext@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -475,19 +474,19 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc -; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc +; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_zeroext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_zeroext@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -513,14 +512,14 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -540,15 +539,15 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -568,16 +567,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -596,16 +595,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -625,15 +624,15 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -656,17 +655,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_signext@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_signext@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_signext@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_signext@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -685,17 +684,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: global_load_sbyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_signext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_signext@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -714,18 +713,18 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: global_load_d16_i8 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -743,18 +742,18 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: global_load_i8 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -773,17 +772,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: global_load_sbyte v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_signext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_signext@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -807,17 +806,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_zeroext@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_zeroext@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_zeroext@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_zeroext@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -836,17 +835,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_zeroext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_zeroext@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -865,18 +864,18 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -894,18 +893,18 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: global_load_u8 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -924,17 +923,17 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_zeroext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_zeroext@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -960,14 +959,14 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -987,15 +986,15 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1015,16 +1014,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7b ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1043,16 +1042,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1072,15 +1071,15 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1103,17 +1102,17 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_signext@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_signext@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_signext@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_signext@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1132,17 +1131,17 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_signext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_signext@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1161,18 +1160,18 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1190,18 +1189,18 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1220,17 +1219,17 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_signext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_signext@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1254,17 +1253,17 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_zeroext@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_zeroext@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_zeroext@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_zeroext@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1283,17 +1282,17 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_zeroext@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_zeroext@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1312,18 +1311,18 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off glc dlc ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1341,18 +1340,18 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off glc dlc ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -1371,17 +1370,17 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_zeroext@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_zeroext@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1407,14 +1406,14 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 42 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1434,15 +1433,15 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1462,16 +1461,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1491,15 +1490,15 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1524,15 +1523,15 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1552,16 +1551,16 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i64@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1581,16 +1580,16 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1610,16 +1609,16 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i64@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1642,18 +1641,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1672,18 +1671,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1702,19 +1701,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1733,18 +1731,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1770,17 +1768,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1800,18 +1798,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1831,17 +1829,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1861,18 +1859,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -1895,20 +1893,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -1927,20 +1925,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i64@abs32@hi -; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -1959,19 +1957,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i64@abs32@lo ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1990,20 +1987,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i64@abs32@hi -; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2029,22 +2026,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: v_mov_b32_e32 v6, 3 ; GFX9-NEXT: v_mov_b32_e32 v7, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2063,22 +2060,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-NEXT: v_mov_b32_e32 v6, 3 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v7, 4 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2097,20 +2094,20 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 1 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4 -; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i64@abs32@hi -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2129,22 +2126,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 3 -; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 4 +; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2171,14 +2168,14 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2198,15 +2195,15 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2226,16 +2223,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x4400 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -2254,16 +2251,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -2283,15 +2280,15 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2316,14 +2313,14 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2343,15 +2340,15 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2371,16 +2368,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_f32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_f32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2400,15 +2397,15 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2433,15 +2430,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2461,16 +2458,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2490,16 +2487,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2519,16 +2516,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2553,16 +2550,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2582,17 +2579,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f32@abs32@hi -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2612,17 +2609,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f32@abs32@hi -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2642,17 +2639,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f32@abs32@hi -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2677,6 +2674,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5f32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5f32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 @@ -2684,11 +2683,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2708,19 +2705,19 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-NEXT: v_mov_b32_e32 v3, -1.0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5f32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2740,18 +2737,18 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5f32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5f32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2771,19 +2768,19 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, -1.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5f32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2808,15 +2805,15 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2836,16 +2833,16 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_f64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_f64@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2865,16 +2862,16 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -2894,16 +2891,16 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f64@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -2928,17 +2925,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2958,18 +2955,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -2989,17 +2986,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3019,18 +3016,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3055,6 +3052,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f64@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f64@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0 @@ -3063,11 +3062,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3087,20 +3084,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f64@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f64@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3120,18 +3117,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f64@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f64@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3151,20 +3148,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 0x40100000 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f64@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f64@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3187,21 +3184,21 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_ushort v0, v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3220,21 +3217,21 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3253,22 +3250,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -3286,22 +3283,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[0:1], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -3320,21 +3317,21 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3358,21 +3355,21 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3391,21 +3388,21 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3424,21 +3421,21 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i8@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3457,21 +3454,21 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3495,22 +3492,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3529,22 +3526,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3563,22 +3560,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i8@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3597,22 +3594,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v3, 24, v0 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3636,15 +3633,15 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -3652,8 +3649,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, v5 ; GFX9-NEXT: v_mov_b32_e32 v4, v6 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3672,15 +3669,15 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -3688,8 +3685,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v4, v6 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3708,15 +3705,15 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i8@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b64 v[5:6], v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -3724,8 +3721,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX11-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-NEXT: v_mov_b32_e32 v4, v6 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3744,15 +3741,15 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -3760,8 +3757,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3785,15 +3782,15 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i8@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -3804,8 +3801,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v4, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, v8 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -3824,15 +3821,15 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i8@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -3843,8 +3840,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v4, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v8 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -3863,15 +3860,15 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i8@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i8@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -3881,8 +3878,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v8 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -3901,15 +3898,15 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i8@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -3920,8 +3917,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, v1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, v8 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -3945,18 +3942,18 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v4, 16 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v[4:5], off -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i8@abs32@hi +; GFX9-NEXT: global_load_dwordx4 v[16:19], v[4:5], off ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i8@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -3996,8 +3993,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v18, v33 ; GFX9-NEXT: v_mov_b32_e32 v19, v34 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -4016,19 +4013,19 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 16 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i8@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i8@abs32@lo ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[4:5], off -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -4068,8 +4065,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v18, v33 ; GFX10-NEXT: v_mov_b32_e32 v19, v34 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4088,17 +4085,18 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 16 ; GFX11-NEXT: v_mov_b32_e32 v5, 0 -; GFX11-NEXT: v_writelane_b32 v40, s0, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i8@abs32@hi -; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i8@abs32@lo +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: global_load_b128 v[16:19], v[4:5], off -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX11-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -4135,8 +4133,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX11-NEXT: v_dual_mov_b32 v17, v32 :: v_dual_mov_b32 v18, v33 ; GFX11-NEXT: v_mov_b32_e32 v19, v34 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -4155,19 +4153,19 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 16 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i8@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i8@abs32@lo ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[4:5], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -4207,8 +4205,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v18, v33 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v19, v34 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -4233,24 +4231,24 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_ubyte v0, v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: global_store_byte v[40:41], v0, off ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -4269,24 +4267,24 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_ret@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_ubyte v0, v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: global_store_byte v[40:41], v0, off ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v42, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -4305,25 +4303,26 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: global_store_b8 v[40:41], v0, off ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -4341,25 +4340,26 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_u8 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: global_store_b8 v[40:41], v0, off ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -4378,24 +4378,24 @@ define amdgpu_gfx void @test_call_external_void_func_i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_ret@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: global_store_byte v[40:41], v0, off ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v42, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -4421,18 +4421,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_ushort v0, v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -4442,8 +4442,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -4462,24 +4462,24 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i8_ret@abs32@lo ; GFX10-NEXT: global_load_ushort v0, v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v42, 2 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -4503,18 +4503,19 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) @@ -4522,8 +4523,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 @@ -4547,18 +4548,19 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_u16 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) @@ -4566,8 +4568,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 @@ -4591,24 +4593,24 @@ define amdgpu_gfx void @test_call_external_void_func_v2i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: global_load_ushort v0, v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v42, 2 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -4639,17 +4641,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dword v0, v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4663,8 +4665,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -4683,17 +4685,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4701,8 +4703,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-NEXT: v_mov_b32_e32 v3, 2 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v42, 2 @@ -4727,36 +4729,37 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_4) ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: global_store_b8 v[3:4], v2, off ; GFX11-TRUE16-NEXT: global_store_b16 v[40:41], v0, off ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -4774,18 +4777,19 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4795,8 +4799,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, v4, v3 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v42, 2 @@ -4822,17 +4826,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4840,8 +4844,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v42, 2 @@ -4873,17 +4877,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dword v0, v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 @@ -4898,8 +4902,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -4918,17 +4922,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i8_ret@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v0, v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4936,8 +4940,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -4963,18 +4967,19 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -4985,11 +4990,11 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX11-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l ; GFX11-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l ; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v1.h, v0.h ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v1.l ; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: global_store_b32 v[40:41], v0, off ; GFX11-TRUE16-NEXT: s_clause 0x1 @@ -5011,18 +5016,19 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b32 v0, v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5033,8 +5039,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-FAKE16-NEXT: v_lshlrev_b16 v3, 8, v3 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3 @@ -5064,17 +5070,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i8_ret@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v0, v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5082,8 +5088,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i8_ret() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -5116,17 +5122,17 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dwordx2 v[5:6], v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -5146,8 +5152,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -5166,17 +5172,17 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i8_ret@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[5:6], v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -5186,8 +5192,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_or_b32_sdwa v5, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -5216,18 +5222,19 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b64 v[5:6], v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -5242,7 +5249,7 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 4 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 0 ; GFX11-TRUE16-NEXT: v_or_b16 v0.h, v1.h, v0.h -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v1.l ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: global_store_b8 v[2:3], v4, off @@ -5251,7 +5258,6 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -5269,18 +5275,19 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b64 v[5:6], v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -5293,8 +5300,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-FAKE16-NEXT: v_lshlrev_b16 v3, 8, v3 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3 @@ -5327,17 +5334,17 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i8_ret@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[5:6], v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b64 v[3:4], 24, v[5:6] ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v1, 8, v5 @@ -5347,8 +5354,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i8_ret() #0 { ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v5, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -5384,17 +5391,17 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[40:41], off -; GFX9-NEXT: v_writelane_b32 v42, s34, 2 -; GFX9-NEXT: v_writelane_b32 v42, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5419,8 +5426,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -5439,17 +5446,17 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-NEXT: v_writelane_b32 v42, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i8_ret@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[40:41], off -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5464,12 +5471,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-NEXT: v_lshlrev_b16 v7, 8, v7 ; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 +; GFX10-NEXT: v_readlane_b32 s30, v42, 0 ; GFX10-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: v_readlane_b32 s34, v42, 2 @@ -5494,18 +5501,19 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-TRUE16-NEXT: global_load_b64 v[0:1], v[40:41], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5527,14 +5535,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX11-TRUE16-NEXT: v_or_b16 v4.h, v1.h, v0.h ; GFX11-TRUE16-NEXT: v_or_b16 v4.l, v3.h, v2.h ; GFX11-TRUE16-NEXT: v_or_b16 v3.h, v2.l, v3.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: v_or_b16 v3.l, v0.l, v1.l ; GFX11-TRUE16-NEXT: global_store_b64 v[40:41], v[3:4], off ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v42, 1 -; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -5552,18 +5559,19 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v41, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: global_load_b64 v[0:1], v[40:41], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s30, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5585,12 +5593,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, v4, v5 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v5, v6, v7 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 +; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff, v4 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v5 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v1, v2, v3 @@ -5617,17 +5625,17 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i8_ret@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[40:41], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v8, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v2, 16, v0 @@ -5642,12 +5650,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i8_ret() #0 { ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v7, 8, v7 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SCRATCH-NEXT: v_lshlrev_b16 v3, 8, v3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v42, 2 @@ -5679,22 +5687,22 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v44, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v44, s30, 0 +; GFX9-NEXT: v_writelane_b32 v44, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: v_mov_b32_e32 v42, 16 ; GFX9-NEXT: v_mov_b32_e32 v41, 0 ; GFX9-NEXT: v_mov_b32_e32 v43, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[40:41], off ; GFX9-NEXT: global_load_dwordx4 v[16:19], v[42:43], off -; GFX9-NEXT: v_writelane_b32 v44, s34, 2 -; GFX9-NEXT: v_writelane_b32 v44, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v44, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -5781,8 +5789,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v44, 1 ; GFX9-NEXT: v_readlane_b32 s30, v44, 0 +; GFX9-NEXT: v_readlane_b32 s31, v44, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v44, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -5801,23 +5809,23 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v44, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v44, s30, 0 +; GFX10-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-NEXT: v_mov_b32_e32 v42, 16 ; GFX10-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-NEXT: v_mov_b32_e32 v43, 0 -; GFX10-NEXT: v_writelane_b32 v44, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i8_ret@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i8_ret@abs32@lo ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[40:41], off ; GFX10-NEXT: global_load_dwordx4 v[16:19], v[42:43], off -; GFX10-NEXT: v_writelane_b32 v44, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -5904,8 +5912,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 -; GFX10-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-NEXT: v_readlane_b32 s30, v44, 0 +; GFX10-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v44, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -5924,22 +5932,26 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s33 offset:16 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s0, 2 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 32 ; GFX11-TRUE16-NEXT: s_clause 0x3 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:12 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s33 offset:8 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s33 offset:4 +; GFX11-TRUE16-NEXT: ; meta instruction ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s33 +; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s30, 0 +; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 16 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v43, 0 -; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s0, 2 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi -; GFX11-TRUE16-NEXT: global_load_b128 v[0:3], v[40:41], off ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo +; GFX11-TRUE16-NEXT: s_clause 0x1 +; GFX11-TRUE16-NEXT: global_load_b128 v[0:3], v[40:41], off ; GFX11-TRUE16-NEXT: global_load_b128 v[16:19], v[42:43], off -; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s30, 0 -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 32 -; GFX11-TRUE16-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -6032,8 +6044,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s33 offset:4 ; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s33 offset:8 ; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:12 -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v44, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v44, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -6051,22 +6063,26 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s33 offset:16 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s0, 2 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 32 ; GFX11-FAKE16-NEXT: s_clause 0x3 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:12 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s33 offset:8 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s33 offset:4 +; GFX11-FAKE16-NEXT: ; meta instruction ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s33 +; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s30, 0 +; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v40, 0 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v41, 0 :: v_dual_mov_b32 v42, 16 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v43, 0 -; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s0, 2 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi -; GFX11-FAKE16-NEXT: global_load_b128 v[0:3], v[40:41], off ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo +; GFX11-FAKE16-NEXT: s_clause 0x1 +; GFX11-FAKE16-NEXT: global_load_b128 v[0:3], v[40:41], off ; GFX11-FAKE16-NEXT: global_load_b128 v[16:19], v[42:43], off -; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s30, 0 -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 32 -; GFX11-FAKE16-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -6183,8 +6199,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s33 offset:4 ; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s33 offset:8 ; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s33 offset:12 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v44, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v44, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -6203,23 +6219,23 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v44, s33 offset:16 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:12 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v42, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v43, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v42, 16 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v43, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s0, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i8_ret@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i8_ret@abs32@lo ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[40:41], off ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[16:19], v[42:43], off -; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v35, 8, v0 ; GFX10-SCRATCH-NEXT: v_lshrrev_b32_e32 v36, 16, v0 @@ -6306,8 +6322,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i8_ret() #0 { ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33 offset:4 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:8 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v44, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v44, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6334,16 +6350,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6362,16 +6378,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dword v0, v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6390,17 +6406,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b32 v0, v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6419,16 +6435,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6452,16 +6468,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6480,16 +6496,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6508,17 +6524,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6537,16 +6553,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6570,16 +6586,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6598,16 +6614,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6626,17 +6642,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6655,16 +6671,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6690,15 +6706,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 3 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6718,16 +6734,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6747,16 +6763,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6776,16 +6792,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6810,15 +6826,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6838,16 +6854,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6867,17 +6883,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -6897,16 +6913,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -6929,16 +6945,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -6957,16 +6973,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -6985,17 +7001,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7014,16 +7030,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7049,15 +7065,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7077,16 +7093,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7106,17 +7122,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7136,16 +7152,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7168,16 +7184,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7196,16 +7212,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dword v0, v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7224,17 +7240,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b32 v0, v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b32 v0, v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7253,16 +7269,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dword v0, v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7286,16 +7302,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7314,16 +7330,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7342,17 +7358,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7371,16 +7387,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7406,15 +7422,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7434,16 +7450,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7463,16 +7479,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7492,16 +7508,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7526,16 +7542,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7555,17 +7571,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32@abs32@hi -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7585,17 +7601,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 ; GFX11-NEXT: v_mov_b32_e32 v2, 5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32@abs32@hi -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7615,17 +7631,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32@abs32@hi -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7650,17 +7666,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_mov_b32_e32 v3, 6 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7680,18 +7696,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-NEXT: v_mov_b32_e32 v3, 6 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7711,17 +7727,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 ; GFX11-NEXT: v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7741,18 +7757,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 5 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7775,16 +7791,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7803,16 +7819,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7831,17 +7847,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7860,16 +7876,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -7895,17 +7911,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -7925,18 +7941,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -7956,17 +7972,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -7986,18 +8002,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8022,6 +8038,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 @@ -8029,11 +8047,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_mov_b32_e32 v4, 5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8053,19 +8069,19 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8085,18 +8101,18 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_mov_b32_e32 v4, 5 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8116,19 +8132,19 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8152,19 +8168,19 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8184,20 +8200,21 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v8, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] ; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8217,21 +8234,21 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_load_b128 v[0:3], v4, s[0:1] ; GFX11-NEXT: global_load_b128 v[4:7], v4, s[0:1] offset:16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8251,20 +8268,21 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1] ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8291,6 +8309,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 1 @@ -8301,11 +8321,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v5, 6 ; GFX9-NEXT: v_mov_b32_e32 v6, 7 ; GFX9-NEXT: v_mov_b32_e32 v7, 8 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8325,22 +8343,22 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-NEXT: v_mov_b32_e32 v5, 6 ; GFX10-NEXT: v_mov_b32_e32 v6, 7 ; GFX10-NEXT: v_mov_b32_e32 v7, 8 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8360,19 +8378,19 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 ; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6 ; GFX11-NEXT: v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8392,22 +8410,22 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 6 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 7 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 8 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8431,10 +8449,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v16, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v16, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 @@ -8442,10 +8461,9 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16i32@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8465,22 +8483,23 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v16, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x3 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] ; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8500,23 +8519,23 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v12, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: global_load_b128 v[0:3], v12, s[0:1] ; GFX11-NEXT: global_load_b128 v[4:7], v12, s[0:1] offset:16 ; GFX11-NEXT: global_load_b128 v[8:11], v12, s[0:1] offset:32 ; GFX11-NEXT: global_load_b128 v[12:15], v12, s[0:1] offset:48 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8536,22 +8555,23 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v16, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x3 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v16, s[0:1] ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[4:7], v16, s[0:1] offset:16 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[8:11], v16, s[0:1] offset:32 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[12:15], v16, s[0:1] offset:48 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8577,10 +8597,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 @@ -8593,10 +8614,9 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8616,9 +8636,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v32, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] @@ -8629,13 +8651,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8655,9 +8676,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v28, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x7 ; GFX11-NEXT: global_load_b128 v[0:3], v28, s[0:1] @@ -8668,14 +8691,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX11-NEXT: global_load_b128 v[20:23], v28, s[0:1] offset:80 ; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96 ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8695,9 +8716,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x7 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] @@ -8708,13 +8731,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8740,10 +8762,12 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: global_load_dword v32, v[0:1], off -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 @@ -8754,15 +8778,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(8) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8782,9 +8804,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v32, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: global_load_dword v33, v[0:1], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 @@ -8796,15 +8820,14 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 ; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 ; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(8) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8824,9 +8847,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: v_mov_b32_e32 v28, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: global_load_b32 v32, v[0:1], off ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x7 @@ -8838,15 +8863,13 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX11-NEXT: global_load_b128 v[20:23], v28, s[0:1] offset:80 ; GFX11-NEXT: global_load_b128 v[24:27], v28, s[0:1] offset:96 ; GFX11-NEXT: global_load_b128 v[28:31], v28, s[0:1] offset:112 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(8) ; GFX11-NEXT: scratch_store_b32 off, v32, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -8866,9 +8889,11 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v32, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: global_load_dword v33, v[0:1], off ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x7 @@ -8880,15 +8905,14 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:96 ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:112 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8) ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -8915,23 +8939,23 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v42, s34, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v42, s30, 0 +; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v40, v0 ; GFX9-NEXT: s_mov_b32 s35, external_i32_func_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_i32_func_i32@abs32@lo ; GFX9-NEXT: v_mov_b32_e32 v0, 42 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v42, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v41, v1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: global_store_dword v[40:41], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: v_readlane_b32 s30, v42, 0 +; GFX9-NEXT: v_readlane_b32 s31, v42, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v42, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -8951,24 +8975,24 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v42, s34, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v40, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s35, external_i32_func_i32@abs32@hi -; GFX10-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-NEXT: s_mov_b32 s34, external_i32_func_i32@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v41, v1 -; GFX10-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: global_store_dword v[40:41], v0, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 -; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v42, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -8988,24 +9012,26 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v42, s0, 2 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s33 +; GFX11-NEXT: v_writelane_b32 v42, s30, 0 +; GFX11-NEXT: v_writelane_b32 v42, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v41, v1 :: v_dual_mov_b32 v40, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 -; GFX11-NEXT: v_writelane_b32 v42, s30, 0 ; GFX11-NEXT: s_mov_b32 s1, external_i32_func_i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_i32_func_i32@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v42, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: global_store_b32 v[40:41], v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GFX11-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-NEXT: v_readlane_b32 s30, v42, 0 +; GFX11-NEXT: v_readlane_b32 s31, v42, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v42, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -9025,24 +9051,24 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %ou ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s0, 2 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v40, v0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_i32_func_i32@abs32@hi -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s30, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_i32_func_i32@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v41, v1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v42, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: global_store_dword v[40:41], v0, off ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v42, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v42, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v42, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9067,19 +9093,19 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35] ; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_struct_i8_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_struct_i8_i32@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -9099,20 +9125,21 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: global_load_ubyte v0, v2, s[34:35] ; GFX10-NEXT: global_load_dword v1, v2, s[34:35] offset:4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_struct_i8_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_struct_i8_i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9132,21 +9159,21 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: global_load_d16_u8 v0, v1, s[0:1] ; GFX11-TRUE16-NEXT: global_load_b32 v1, v1, s[0:1] offset:4 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -9165,21 +9192,21 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 ; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: global_load_u8 v0, v1, s[0:1] ; GFX11-FAKE16-NEXT: global_load_b32 v1, v1, s[0:1] offset:4 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -9199,20 +9226,21 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: global_load_ubyte v0, v2, s[0:1] ; GFX10-SCRATCH-NEXT: global_load_dword v1, v2, s[0:1] offset:4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_struct_i8_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_struct_i8_i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9237,20 +9265,20 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_byval_struct_i8_i32@abs32@lo ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -9270,19 +9298,19 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_byval_struct_i8_i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9302,21 +9330,20 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 3 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 8 ; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v1, s33 offset:4 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s33 -; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -9335,20 +9362,19 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 ; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v1, s33 offset:4 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s33 -; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 16 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 @@ -9368,19 +9394,19 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_byval_struct_i8_i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_byval_struct_i8_i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s33 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9408,24 +9434,24 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x800 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33 -; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -9449,25 +9475,25 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x400 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33 -; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -9492,24 +9518,24 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:16 ; 4-byte Folded Spill ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-TRUE16-NEXT: s_add_i32 s32, s32, 32 +; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 3 ; GFX11-TRUE16-NEXT: s_add_i32 s2, s33, 8 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 8 -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi +; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-TRUE16-NEXT: scratch_store_b32 off, v1, s33 offset:4 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s33 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi -; GFX11-TRUE16-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo -; GFX11-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-TRUE16-NEXT: s_clause 0x1 ; GFX11-TRUE16-NEXT: scratch_load_d16_u8 v0, off, s33 offset:8 ; GFX11-TRUE16-NEXT: scratch_load_b32 v1, off, s33 offset:12 -; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-TRUE16-NEXT: s_mov_b32 s32, s33 ; GFX11-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) @@ -9533,23 +9559,23 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:16 ; 4-byte Folded Spill ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-FAKE16-NEXT: s_add_i32 s32, s32, 32 +; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 ; GFX11-FAKE16-NEXT: s_add_i32 s2, s33, 8 -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi +; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_store_b8 off, v0, s33 ; GFX11-FAKE16-NEXT: scratch_store_b32 off, v1, s33 offset:4 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s33 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi -; GFX11-FAKE16-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo -; GFX11-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-FAKE16-NEXT: s_clause 0x1 ; GFX11-FAKE16-NEXT: scratch_load_u8 v0, off, s33 offset:8 ; GFX11-FAKE16-NEXT: scratch_load_b32 v1, off, s33 offset:12 -; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-FAKE16-NEXT: s_mov_b32 s32, s33 ; GFX11-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) @@ -9574,24 +9600,24 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 8 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s33, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s33 ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v1, s33 offset:4 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s33 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_ubyte v0, off, s33 offset:8 ; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) @@ -9633,11 +9659,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i8@abs32@hi @@ -9662,8 +9688,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, v17 ; GFX9-NEXT: v_mov_b32_e32 v3, v18 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -9683,11 +9709,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 @@ -9713,8 +9739,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-NEXT: v_mov_b32_e32 v3, v18 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -9734,11 +9760,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: global_load_b128 v[0:3], v0, s[0:1] ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i8@abs32@hi @@ -9761,8 +9787,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v1, v16 ; GFX11-NEXT: v_dual_mov_b32 v2, v17 :: v_dual_mov_b32 v3, v18 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -9782,11 +9808,11 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 @@ -9812,8 +9838,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v18 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -9838,49 +9864,49 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_addk_i32 s32, 0x800 +; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s35, 1 +; GFX9-NEXT: v_writelane_b32 v40, s36, 2 +; GFX9-NEXT: v_writelane_b32 v40, s37, 3 +; GFX9-NEXT: v_writelane_b32 v40, s38, 4 +; GFX9-NEXT: v_writelane_b32 v40, s39, 5 +; GFX9-NEXT: v_writelane_b32 v40, s48, 6 +; GFX9-NEXT: v_writelane_b32 v40, s49, 7 +; GFX9-NEXT: v_writelane_b32 v40, s50, 8 +; GFX9-NEXT: v_writelane_b32 v40, s51, 9 +; GFX9-NEXT: v_writelane_b32 v40, s52, 10 +; GFX9-NEXT: v_writelane_b32 v40, s53, 11 +; GFX9-NEXT: v_writelane_b32 v40, s54, 12 +; GFX9-NEXT: v_writelane_b32 v40, s55, 13 +; GFX9-NEXT: v_writelane_b32 v40, s30, 14 +; GFX9-NEXT: v_writelane_b32 v40, s31, 15 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:16 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:20 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s33 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s35, 3 -; GFX9-NEXT: v_writelane_b32 v40, s36, 4 -; GFX9-NEXT: v_writelane_b32 v40, s37, 5 -; GFX9-NEXT: v_writelane_b32 v40, s38, 6 -; GFX9-NEXT: v_writelane_b32 v40, s39, 7 -; GFX9-NEXT: v_writelane_b32 v40, s48, 8 -; GFX9-NEXT: v_writelane_b32 v40, s49, 9 -; GFX9-NEXT: v_writelane_b32 v40, s50, 10 -; GFX9-NEXT: v_writelane_b32 v40, s51, 11 -; GFX9-NEXT: v_writelane_b32 v40, s52, 12 -; GFX9-NEXT: v_writelane_b32 v40, s53, 13 -; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s54, 14 ; GFX9-NEXT: s_mov_b32 s5, byval_align16_f64_arg@abs32@hi ; GFX9-NEXT: s_mov_b32 s4, byval_align16_f64_arg@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s55, 15 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s55, v40, 15 -; GFX9-NEXT: v_readlane_b32 s54, v40, 14 -; GFX9-NEXT: v_readlane_b32 s53, v40, 13 -; GFX9-NEXT: v_readlane_b32 s52, v40, 12 -; GFX9-NEXT: v_readlane_b32 s51, v40, 11 -; GFX9-NEXT: v_readlane_b32 s50, v40, 10 -; GFX9-NEXT: v_readlane_b32 s49, v40, 9 -; GFX9-NEXT: v_readlane_b32 s48, v40, 8 -; GFX9-NEXT: v_readlane_b32 s39, v40, 7 -; GFX9-NEXT: v_readlane_b32 s38, v40, 6 -; GFX9-NEXT: v_readlane_b32 s37, v40, 5 -; GFX9-NEXT: v_readlane_b32 s36, v40, 4 -; GFX9-NEXT: v_readlane_b32 s35, v40, 3 -; GFX9-NEXT: v_readlane_b32 s34, v40, 2 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s30, v40, 14 +; GFX9-NEXT: v_readlane_b32 s31, v40, 15 +; GFX9-NEXT: v_readlane_b32 s55, v40, 13 +; GFX9-NEXT: v_readlane_b32 s54, v40, 12 +; GFX9-NEXT: v_readlane_b32 s53, v40, 11 +; GFX9-NEXT: v_readlane_b32 s52, v40, 10 +; GFX9-NEXT: v_readlane_b32 s51, v40, 9 +; GFX9-NEXT: v_readlane_b32 s50, v40, 8 +; GFX9-NEXT: v_readlane_b32 s49, v40, 7 +; GFX9-NEXT: v_readlane_b32 s48, v40, 6 +; GFX9-NEXT: v_readlane_b32 s39, v40, 5 +; GFX9-NEXT: v_readlane_b32 s38, v40, 4 +; GFX9-NEXT: v_readlane_b32 s37, v40, 3 +; GFX9-NEXT: v_readlane_b32 s36, v40, 2 +; GFX9-NEXT: v_readlane_b32 s35, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload @@ -9898,50 +9924,50 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_addk_i32 s32, 0x400 +; GFX10-NEXT: v_writelane_b32 v40, s34, 0 +; GFX10-NEXT: v_writelane_b32 v40, s35, 1 +; GFX10-NEXT: v_writelane_b32 v40, s36, 2 +; GFX10-NEXT: v_writelane_b32 v40, s37, 3 +; GFX10-NEXT: v_writelane_b32 v40, s38, 4 +; GFX10-NEXT: v_writelane_b32 v40, s39, 5 +; GFX10-NEXT: v_writelane_b32 v40, s48, 6 +; GFX10-NEXT: v_writelane_b32 v40, s49, 7 +; GFX10-NEXT: v_writelane_b32 v40, s50, 8 +; GFX10-NEXT: v_writelane_b32 v40, s51, 9 +; GFX10-NEXT: v_writelane_b32 v40, s52, 10 +; GFX10-NEXT: v_writelane_b32 v40, s53, 11 +; GFX10-NEXT: v_writelane_b32 v40, s54, 12 +; GFX10-NEXT: v_writelane_b32 v40, s55, 13 +; GFX10-NEXT: v_writelane_b32 v40, s30, 14 +; GFX10-NEXT: v_writelane_b32 v40, s31, 15 ; GFX10-NEXT: s_clause 0x2 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:16 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:20 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s33 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: s_mov_b32 s5, byval_align16_f64_arg@abs32@hi ; GFX10-NEXT: s_mov_b32 s4, byval_align16_f64_arg@abs32@lo ; GFX10-NEXT: s_waitcnt vmcnt(2) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: v_writelane_b32 v40, s35, 3 -; GFX10-NEXT: v_writelane_b32 v40, s36, 4 -; GFX10-NEXT: v_writelane_b32 v40, s37, 5 -; GFX10-NEXT: v_writelane_b32 v40, s38, 6 -; GFX10-NEXT: v_writelane_b32 v40, s39, 7 -; GFX10-NEXT: v_writelane_b32 v40, s48, 8 -; GFX10-NEXT: v_writelane_b32 v40, s49, 9 -; GFX10-NEXT: v_writelane_b32 v40, s50, 10 -; GFX10-NEXT: v_writelane_b32 v40, s51, 11 -; GFX10-NEXT: v_writelane_b32 v40, s52, 12 -; GFX10-NEXT: v_writelane_b32 v40, s53, 13 -; GFX10-NEXT: v_writelane_b32 v40, s54, 14 -; GFX10-NEXT: v_writelane_b32 v40, s55, 15 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s55, v40, 15 -; GFX10-NEXT: v_readlane_b32 s54, v40, 14 -; GFX10-NEXT: v_readlane_b32 s53, v40, 13 -; GFX10-NEXT: v_readlane_b32 s52, v40, 12 -; GFX10-NEXT: v_readlane_b32 s51, v40, 11 -; GFX10-NEXT: v_readlane_b32 s50, v40, 10 -; GFX10-NEXT: v_readlane_b32 s49, v40, 9 -; GFX10-NEXT: v_readlane_b32 s48, v40, 8 -; GFX10-NEXT: v_readlane_b32 s39, v40, 7 -; GFX10-NEXT: v_readlane_b32 s38, v40, 6 -; GFX10-NEXT: v_readlane_b32 s37, v40, 5 -; GFX10-NEXT: v_readlane_b32 s36, v40, 4 -; GFX10-NEXT: v_readlane_b32 s35, v40, 3 -; GFX10-NEXT: v_readlane_b32 s34, v40, 2 -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s30, v40, 14 +; GFX10-NEXT: v_readlane_b32 s31, v40, 15 +; GFX10-NEXT: v_readlane_b32 s55, v40, 13 +; GFX10-NEXT: v_readlane_b32 s54, v40, 12 +; GFX10-NEXT: v_readlane_b32 s53, v40, 11 +; GFX10-NEXT: v_readlane_b32 s52, v40, 10 +; GFX10-NEXT: v_readlane_b32 s51, v40, 9 +; GFX10-NEXT: v_readlane_b32 s50, v40, 8 +; GFX10-NEXT: v_readlane_b32 s49, v40, 7 +; GFX10-NEXT: v_readlane_b32 s48, v40, 6 +; GFX10-NEXT: v_readlane_b32 s39, v40, 5 +; GFX10-NEXT: v_readlane_b32 s38, v40, 4 +; GFX10-NEXT: v_readlane_b32 s37, v40, 3 +; GFX10-NEXT: v_readlane_b32 s36, v40, 2 +; GFX10-NEXT: v_readlane_b32 s35, v40, 1 +; GFX10-NEXT: v_readlane_b32 s34, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_or_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload @@ -9959,47 +9985,47 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:24 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_add_i32 s32, s32, 32 +; GFX11-NEXT: v_writelane_b32 v40, s34, 0 +; GFX11-NEXT: v_writelane_b32 v40, s35, 1 +; GFX11-NEXT: v_writelane_b32 v40, s36, 2 +; GFX11-NEXT: v_writelane_b32 v40, s37, 3 +; GFX11-NEXT: v_writelane_b32 v40, s38, 4 +; GFX11-NEXT: v_writelane_b32 v40, s39, 5 +; GFX11-NEXT: v_writelane_b32 v40, s48, 6 +; GFX11-NEXT: v_writelane_b32 v40, s49, 7 +; GFX11-NEXT: v_writelane_b32 v40, s50, 8 +; GFX11-NEXT: v_writelane_b32 v40, s51, 9 +; GFX11-NEXT: v_writelane_b32 v40, s52, 10 +; GFX11-NEXT: v_writelane_b32 v40, s53, 11 +; GFX11-NEXT: v_writelane_b32 v40, s54, 12 +; GFX11-NEXT: v_writelane_b32 v40, s55, 13 +; GFX11-NEXT: v_writelane_b32 v40, s30, 14 +; GFX11-NEXT: v_writelane_b32 v40, s31, 15 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 offset:16 ; GFX11-NEXT: scratch_load_b32 v31, off, s33 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: s_mov_b32 s1, byval_align16_f64_arg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, byval_align16_f64_arg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 -; GFX11-NEXT: v_writelane_b32 v40, s34, 2 -; GFX11-NEXT: v_writelane_b32 v40, s35, 3 -; GFX11-NEXT: v_writelane_b32 v40, s36, 4 -; GFX11-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-NEXT: v_writelane_b32 v40, s48, 8 -; GFX11-NEXT: v_writelane_b32 v40, s49, 9 -; GFX11-NEXT: v_writelane_b32 v40, s50, 10 -; GFX11-NEXT: v_writelane_b32 v40, s51, 11 -; GFX11-NEXT: v_writelane_b32 v40, s52, 12 -; GFX11-NEXT: v_writelane_b32 v40, s53, 13 -; GFX11-NEXT: v_writelane_b32 v40, s54, 14 -; GFX11-NEXT: v_writelane_b32 v40, s55, 15 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s55, v40, 15 -; GFX11-NEXT: v_readlane_b32 s54, v40, 14 -; GFX11-NEXT: v_readlane_b32 s53, v40, 13 -; GFX11-NEXT: v_readlane_b32 s52, v40, 12 -; GFX11-NEXT: v_readlane_b32 s51, v40, 11 -; GFX11-NEXT: v_readlane_b32 s50, v40, 10 -; GFX11-NEXT: v_readlane_b32 s49, v40, 9 -; GFX11-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-NEXT: v_readlane_b32 s38, v40, 6 -; GFX11-NEXT: v_readlane_b32 s37, v40, 5 -; GFX11-NEXT: v_readlane_b32 s36, v40, 4 -; GFX11-NEXT: v_readlane_b32 s35, v40, 3 -; GFX11-NEXT: v_readlane_b32 s34, v40, 2 -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 -; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s30, v40, 14 +; GFX11-NEXT: v_readlane_b32 s31, v40, 15 +; GFX11-NEXT: v_readlane_b32 s55, v40, 13 +; GFX11-NEXT: v_readlane_b32 s54, v40, 12 +; GFX11-NEXT: v_readlane_b32 s53, v40, 11 +; GFX11-NEXT: v_readlane_b32 s52, v40, 10 +; GFX11-NEXT: v_readlane_b32 s51, v40, 9 +; GFX11-NEXT: v_readlane_b32 s50, v40, 8 +; GFX11-NEXT: v_readlane_b32 s49, v40, 7 +; GFX11-NEXT: v_readlane_b32 s48, v40, 6 +; GFX11-NEXT: v_readlane_b32 s39, v40, 5 +; GFX11-NEXT: v_readlane_b32 s38, v40, 4 +; GFX11-NEXT: v_readlane_b32 s37, v40, 3 +; GFX11-NEXT: v_readlane_b32 s36, v40, 2 +; GFX11-NEXT: v_readlane_b32 s35, v40, 1 +; GFX11-NEXT: v_readlane_b32 s34, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:24 ; 4-byte Folded Reload @@ -10017,47 +10043,47 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:24 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 9 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 11 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 13 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 14 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 15 ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:16 ; GFX10-SCRATCH-NEXT: scratch_load_dword v31, off, s33 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, byval_align16_f64_arg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, byval_align16_f64_arg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 9 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 10 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 11 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 13 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 14 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 15 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 15 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 14 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 13 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 11 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 10 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 9 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 8 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 2 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 14 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 15 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 13 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 12 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 11 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 10 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s33 offset:24 ; 4-byte Folded Reload @@ -10083,16 +10109,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i1_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i1_inreg@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -10112,16 +10138,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: v_mov_b32_e32 v0, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_i1_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_i1_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -10141,17 +10167,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: v_mov_b32_e32 v0, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: v_mov_b32_e32 v0, 1 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_i1_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_i1_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: scratch_store_b8 off, v0, s32 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -10171,16 +10196,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i1_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i1_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -10204,16 +10229,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i8_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i8_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x7b -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -10234,16 +10259,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i8_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i8_inreg@abs32@lo +; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -10264,17 +10289,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_movk_i32 s4, 0x7b ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo +; GFX11-NEXT: s_movk_i32 s4, 0x7b ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -10295,16 +10319,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i8_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i8_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -10329,16 +10353,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i16_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x7b -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -10359,16 +10383,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i16_inreg@abs32@lo +; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -10389,17 +10413,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_movk_i32 s4, 0x7b ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo +; GFX11-NEXT: s_movk_i32 s4, 0x7b ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -10420,16 +10443,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i16_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -10454,16 +10477,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 42 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -10484,16 +10507,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_i32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 42 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i32_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 42 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -10514,17 +10537,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 42 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 42 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -10545,16 +10567,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i32_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -10579,18 +10601,18 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_i64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_i64_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: s_mov_b32 s5, 0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -10612,18 +10634,18 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_i64_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_i64_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_i64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_i64_inreg@abs32@lo +; GFX10-NEXT: s_movk_i32 s4, 0x7b +; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -10645,19 +10667,18 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_movk_i32 s4, 0x7b ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo +; GFX11-NEXT: s_movk_i32 s4, 0x7b +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -10679,18 +10700,18 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_i64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_i64_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -10716,20 +10737,20 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 -; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -10753,20 +10774,20 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 -; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -10790,21 +10811,21 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 -; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s30, 4 +; GFX11-NEXT: v_writelane_b32 v40, s31, 5 +; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 4 -; GFX11-NEXT: v_writelane_b32 v40, s31, 5 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -10828,20 +10849,20 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 -; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -10870,22 +10891,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -10909,22 +10930,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-NEXT: s_mov_b32 s6, 3 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i64_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_mov_b32 s5, 2 +; GFX10-NEXT: s_mov_b32 s6, 3 +; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -10948,23 +10969,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 1 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 2 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 -; GFX11-NEXT: s_mov_b32 s6, 3 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 -; GFX11-NEXT: s_mov_b32 s7, 4 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 1 +; GFX11-NEXT: s_mov_b32 s5, 2 +; GFX11-NEXT: s_mov_b32 s6, 3 +; GFX11-NEXT: s_mov_b32 s7, 4 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -10988,22 +11008,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i64_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -11031,24 +11051,24 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 8 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 -; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s30, 6 +; GFX9-NEXT: v_writelane_b32 v40, s31, 7 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 7 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -11074,24 +11094,24 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 8 -; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i64_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i64_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-NEXT: s_mov_b32 s9, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 +; GFX10-NEXT: s_mov_b32 s8, 1 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i64_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s9, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -11117,25 +11137,24 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 8 -; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 -; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s8, 4 -; GFX11-NEXT: s_mov_b32 s8, 1 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5 -; GFX11-NEXT: s_mov_b32 s9, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 6 ; GFX11-NEXT: v_writelane_b32 v40, s31, 7 +; GFX11-NEXT: s_mov_b64 s[0:1], 0 +; GFX11-NEXT: s_mov_b32 s8, 1 +; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s9, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 7 ; GFX11-NEXT: v_readlane_b32 s30, v40, 6 +; GFX11-NEXT: v_readlane_b32 s31, v40, 7 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 @@ -11161,24 +11180,24 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8 -; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 +; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i64_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 @@ -11211,28 +11230,28 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 10 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 ; GFX9-NEXT: s_mov_b32 s10, 3 ; GFX9-NEXT: s_mov_b32 s11, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -11260,28 +11279,28 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10 -; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i64_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i64_inreg@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-NEXT: s_mov_b32 s9, 2 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6 -; GFX10-NEXT: s_mov_b32 s10, 3 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7 -; GFX10-NEXT: s_mov_b32 s11, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 +; GFX10-NEXT: s_mov_b32 s8, 1 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i64_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s9, 2 +; GFX10-NEXT: s_mov_b32 s10, 3 +; GFX10-NEXT: s_mov_b32 s11, 4 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -11309,29 +11328,28 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10 -; GFX11-NEXT: s_mov_b64 s[0:1], 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 -; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s8, 4 -; GFX11-NEXT: s_mov_b32 s8, 1 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5 -; GFX11-NEXT: s_mov_b32 s9, 2 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6 -; GFX11-NEXT: s_mov_b32 s10, 3 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7 -; GFX11-NEXT: s_mov_b32 s11, 4 ; GFX11-NEXT: v_writelane_b32 v40, s30, 8 ; GFX11-NEXT: v_writelane_b32 v40, s31, 9 +; GFX11-NEXT: s_mov_b64 s[0:1], 0 +; GFX11-NEXT: s_mov_b32 s8, 1 +; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s9, 2 +; GFX11-NEXT: s_mov_b32 s10, 3 +; GFX11-NEXT: s_mov_b32 s11, 4 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5 @@ -11359,28 +11377,28 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 -; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 -; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 -; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i64_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -11414,16 +11432,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f16_inreg@abs32@lo ; GFX9-NEXT: s_movk_i32 s4, 0x4400 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -11444,16 +11462,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_f16_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_f16_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_f16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_f16_inreg@abs32@lo +; GFX10-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -11474,17 +11492,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_movk_i32 s4, 0x4400 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo +; GFX11-NEXT: s_movk_i32 s4, 0x4400 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -11505,16 +11522,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f16_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -11539,16 +11556,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 4.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -11569,16 +11586,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_f32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_f32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_f32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_f32_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 4.0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -11599,17 +11616,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 4.0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 4.0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -11630,16 +11646,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -11664,18 +11680,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -11697,18 +11713,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f32_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 1.0 +; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -11730,19 +11746,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 1.0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 2.0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 1.0 +; GFX11-NEXT: s_mov_b32 s5, 2.0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -11764,18 +11779,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f32_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -11801,20 +11816,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 5 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 3 +; GFX9-NEXT: v_writelane_b32 v40, s31, 4 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 4 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -11837,20 +11852,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-NEXT: s_mov_b32 s6, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f32_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 1.0 +; GFX10-NEXT: s_mov_b32 s5, 2.0 +; GFX10-NEXT: s_mov_b32 s6, 4.0 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -11873,21 +11888,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 1.0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 2.0 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 -; GFX11-NEXT: s_mov_b32 s6, 4.0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 3 ; GFX11-NEXT: v_writelane_b32 v40, s31, 4 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 1.0 +; GFX11-NEXT: s_mov_b32 s5, 2.0 +; GFX11-NEXT: s_mov_b32 s6, 4.0 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 4 ; GFX11-NEXT: v_readlane_b32 s30, v40, 3 +; GFX11-NEXT: v_readlane_b32 s31, v40, 4 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 @@ -11910,20 +11924,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f32_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 @@ -11950,12 +11964,14 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 7 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s30, 5 +; GFX9-NEXT: v_writelane_b32 v40, s31, 6 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5f32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5f32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1.0 @@ -11963,11 +11979,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s6, 4.0 ; GFX9-NEXT: s_mov_b32 s7, -1.0 ; GFX9-NEXT: s_mov_b32 s8, 0.5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 6 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -11992,24 +12006,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5f32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5f32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-NEXT: s_mov_b32 s6, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_mov_b32 s7, -1.0 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-NEXT: s_mov_b32 s8, 0.5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5f32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5f32_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 1.0 +; GFX10-NEXT: s_mov_b32 s5, 2.0 +; GFX10-NEXT: s_mov_b32 s6, 4.0 +; GFX10-NEXT: s_mov_b32 s7, -1.0 +; GFX10-NEXT: s_mov_b32 s8, 0.5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -12034,25 +12048,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 1.0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 2.0 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 -; GFX11-NEXT: s_mov_b32 s6, 4.0 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 -; GFX11-NEXT: s_mov_b32 s7, -1.0 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4 -; GFX11-NEXT: s_mov_b32 s8, 0.5 ; GFX11-NEXT: v_writelane_b32 v40, s30, 5 ; GFX11-NEXT: v_writelane_b32 v40, s31, 6 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 1.0 +; GFX11-NEXT: s_mov_b32 s5, 2.0 +; GFX11-NEXT: s_mov_b32 s6, 4.0 +; GFX11-NEXT: s_mov_b32 s7, -1.0 +; GFX11-NEXT: s_mov_b32 s8, 0.5 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 6 ; GFX11-NEXT: v_readlane_b32 s30, v40, 5 +; GFX11-NEXT: v_readlane_b32 s31, v40, 6 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 @@ -12077,24 +12090,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, -1.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5f32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5f32_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, -1.0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 @@ -12123,18 +12136,18 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_f64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_f64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -12156,18 +12169,18 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_f64_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_f64_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_f64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_f64_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -12189,19 +12202,18 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 0x40100000 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s5, 0x40100000 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -12223,18 +12235,18 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_f64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_f64_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -12260,22 +12272,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 0 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -12299,22 +12311,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f64_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f64_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-NEXT: s_mov_b32 s6, 0 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f64_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s5, 2.0 +; GFX10-NEXT: s_mov_b32 s6, 0 +; GFX10-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -12338,23 +12350,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 2.0 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 -; GFX11-NEXT: s_mov_b32 s6, 0 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 -; GFX11-NEXT: s_mov_b32 s7, 0x40100000 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s5, 2.0 +; GFX11-NEXT: s_mov_b32 s6, 0 +; GFX11-NEXT: s_mov_b32 s7, 0x40100000 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -12378,22 +12389,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f64_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -12421,6 +12432,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 8 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 @@ -12428,6 +12440,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s30, 6 +; GFX9-NEXT: v_writelane_b32 v40, s31, 7 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f64_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f64_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0 @@ -12436,11 +12449,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 ; GFX9-NEXT: s_mov_b32 s8, 0 ; GFX9-NEXT: s_mov_b32 s9, 0x40200000 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 7 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -12466,26 +12477,26 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 8 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f64_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f64_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-NEXT: s_mov_b32 s6, 0 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-NEXT: s_mov_b32 s8, 0 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-NEXT: s_mov_b32 s9, 0x40200000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f64_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f64_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s5, 2.0 +; GFX10-NEXT: s_mov_b32 s6, 0 +; GFX10-NEXT: s_mov_b32 s7, 0x40100000 +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s9, 0x40200000 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -12511,27 +12522,26 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 8 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s6, 2 +; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s8, 4 +; GFX11-NEXT: v_writelane_b32 v40, s9, 5 +; GFX11-NEXT: v_writelane_b32 v40, s30, 6 +; GFX11-NEXT: v_writelane_b32 v40, s31, 7 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_mov_b32 s5, 2.0 -; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: s_mov_b32 s6, 0 -; GFX11-NEXT: v_writelane_b32 v40, s7, 3 ; GFX11-NEXT: s_mov_b32 s7, 0x40100000 -; GFX11-NEXT: v_writelane_b32 v40, s8, 4 ; GFX11-NEXT: s_mov_b32 s8, 0 -; GFX11-NEXT: v_writelane_b32 v40, s9, 5 ; GFX11-NEXT: s_mov_b32 s9, 0x40200000 -; GFX11-NEXT: v_writelane_b32 v40, s30, 6 -; GFX11-NEXT: v_writelane_b32 v40, s31, 7 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 7 ; GFX11-NEXT: v_readlane_b32 s30, v40, 6 +; GFX11-NEXT: v_readlane_b32 s31, v40, 7 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 @@ -12557,26 +12567,26 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 8 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 0x40200000 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f64_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f64_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 0x40200000 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 @@ -12606,16 +12616,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i16_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -12638,14 +12648,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i16_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -12668,15 +12678,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i16_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -12699,14 +12709,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -12732,17 +12742,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -12767,14 +12777,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -12799,15 +12809,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -12832,14 +12842,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -12866,17 +12876,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -12901,14 +12911,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -12933,15 +12943,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -12966,14 +12976,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13000,18 +13010,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 3 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13033,18 +13043,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 3 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i16_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-NEXT: s_mov_b32 s5, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13066,19 +13076,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 0x20001 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 3 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 0x20001 +; GFX11-NEXT: s_mov_b32 s5, 3 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13100,18 +13109,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i16_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13137,18 +13146,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX9-NEXT: s_movk_i32 s5, 0x4400 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13170,18 +13179,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3f16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3f16_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 +; GFX10-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13203,19 +13212,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_movk_i32 s5, 0x4400 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 0x40003c00 +; GFX11-NEXT: s_movk_i32 s5, 0x4400 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13237,18 +13245,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3f16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3f16_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00 +; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13274,17 +13282,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13309,14 +13317,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13341,15 +13349,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13374,14 +13382,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13408,18 +13416,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 0x40003 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13441,18 +13449,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i16_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i16_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13474,19 +13482,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 0x20001 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 0x40003 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 0x20001 +; GFX11-NEXT: s_mov_b32 s5, 0x40003 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13508,18 +13515,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i16_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i16_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13545,16 +13552,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2f16_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2f16_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -13577,14 +13584,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2f16_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2f16_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -13607,15 +13614,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2f16_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2f16_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -13638,14 +13645,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 3 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2f16_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2f16_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 3 @@ -13671,17 +13678,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13706,14 +13713,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13738,15 +13745,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 +; GFX11-NEXT: v_writelane_b32 v40, s30, 2 +; GFX11-NEXT: v_writelane_b32 v40, s31, 3 ; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 2 -; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13771,14 +13778,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13805,18 +13812,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -13838,18 +13845,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2i32_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -13871,19 +13878,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 1 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 2 ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 1 +; GFX11-NEXT: s_mov_b32 s5, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -13905,18 +13911,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2i32_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 @@ -13942,20 +13948,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 5 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 3 +; GFX9-NEXT: v_writelane_b32 v40, s31, 4 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 3 ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 4 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -13978,20 +13984,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 5 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 4 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-NEXT: s_mov_b32 s6, 5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 3 +; GFX10-NEXT: s_mov_b32 s5, 4 +; GFX10-NEXT: s_mov_b32 s6, 5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -14014,21 +14020,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 5 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 3 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 4 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 -; GFX11-NEXT: s_mov_b32 s6, 5 ; GFX11-NEXT: v_writelane_b32 v40, s30, 3 ; GFX11-NEXT: v_writelane_b32 v40, s31, 4 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 3 +; GFX11-NEXT: s_mov_b32 s5, 4 +; GFX11-NEXT: s_mov_b32 s6, 5 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 4 ; GFX11-NEXT: v_readlane_b32 s30, v40, 3 +; GFX11-NEXT: v_readlane_b32 s31, v40, 4 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 @@ -14051,20 +14056,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 5 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 @@ -14091,22 +14096,22 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 3 ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: s_mov_b32 s7, 6 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -14130,22 +14135,22 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 4 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-NEXT: s_mov_b32 s6, 5 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_mov_b32 s7, 6 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3i32_i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3i32_i32_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 3 +; GFX10-NEXT: s_mov_b32 s5, 4 +; GFX10-NEXT: s_mov_b32 s6, 5 +; GFX10-NEXT: s_mov_b32 s7, 6 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -14169,23 +14174,22 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 3 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 4 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 -; GFX11-NEXT: s_mov_b32 s6, 5 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 -; GFX11-NEXT: s_mov_b32 s7, 6 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 3 +; GFX11-NEXT: s_mov_b32 s5, 4 +; GFX11-NEXT: s_mov_b32 s6, 5 +; GFX11-NEXT: s_mov_b32 s7, 6 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -14209,22 +14213,22 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3i32_i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3i32_i32_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -14252,19 +14256,19 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -14293,14 +14297,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -14329,15 +14333,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 +; GFX11-NEXT: v_writelane_b32 v40, s30, 4 +; GFX11-NEXT: v_writelane_b32 v40, s31, 5 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 4 -; GFX11-NEXT: v_writelane_b32 v40, s31, 5 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -14366,14 +14370,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -14402,22 +14406,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 6 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -14441,22 +14445,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 6 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-NEXT: s_mov_b32 s6, 3 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4i32_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_mov_b32 s5, 2 +; GFX10-NEXT: s_mov_b32 s6, 3 +; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -14480,23 +14484,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 6 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 1 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 2 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 -; GFX11-NEXT: s_mov_b32 s6, 3 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 -; GFX11-NEXT: s_mov_b32 s7, 4 ; GFX11-NEXT: v_writelane_b32 v40, s30, 4 ; GFX11-NEXT: v_writelane_b32 v40, s31, 5 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 1 +; GFX11-NEXT: s_mov_b32 s5, 2 +; GFX11-NEXT: s_mov_b32 s6, 3 +; GFX11-NEXT: s_mov_b32 s7, 4 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s30, v40, 4 +; GFX11-NEXT: v_readlane_b32 s31, v40, 5 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 @@ -14520,22 +14523,22 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 6 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4i32_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -14563,12 +14566,14 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 7 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s30, 5 +; GFX9-NEXT: v_writelane_b32 v40, s31, 6 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v5i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v5i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 @@ -14576,11 +14581,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: s_mov_b32 s8, 5 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 6 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -14605,24 +14608,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 7 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-NEXT: v_writelane_b32 v40, s30, 5 +; GFX10-NEXT: v_writelane_b32 v40, s31, 6 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v5i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v5i32_inreg@abs32@lo ; GFX10-NEXT: s_mov_b32 s4, 1 -; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 -; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 3 -; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 4 -; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 5 -; GFX10-NEXT: v_writelane_b32 v40, s30, 5 -; GFX10-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -14647,25 +14650,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 7 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 1 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 2 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 -; GFX11-NEXT: s_mov_b32 s6, 3 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 -; GFX11-NEXT: s_mov_b32 s7, 4 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4 -; GFX11-NEXT: s_mov_b32 s8, 5 ; GFX11-NEXT: v_writelane_b32 v40, s30, 5 ; GFX11-NEXT: v_writelane_b32 v40, s31, 6 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 1 +; GFX11-NEXT: s_mov_b32 s5, 2 +; GFX11-NEXT: s_mov_b32 s6, 3 +; GFX11-NEXT: s_mov_b32 s7, 4 +; GFX11-NEXT: s_mov_b32 s8, 5 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 6 ; GFX11-NEXT: v_readlane_b32 s30, v40, 5 +; GFX11-NEXT: v_readlane_b32 s31, v40, 6 ; GFX11-NEXT: v_readlane_b32 s8, v40, 4 ; GFX11-NEXT: v_readlane_b32 s7, v40, 3 ; GFX11-NEXT: v_readlane_b32 s6, v40, 2 @@ -14690,24 +14692,24 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 7 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v5i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v5i32_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 @@ -14736,25 +14738,25 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 10 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 8 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -14782,7 +14784,6 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -14792,15 +14793,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-NEXT: v_writelane_b32 v40, s30, 8 +; GFX10-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 8 -; GFX10-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -14828,7 +14830,6 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -14838,16 +14839,17 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s9, 5 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7 +; GFX11-NEXT: v_writelane_b32 v40, s30, 8 +; GFX11-NEXT: v_writelane_b32 v40, s31, 9 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b256 s[4:11], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 8 -; GFX11-NEXT: v_writelane_b32 v40, s31, 9 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5 @@ -14875,7 +14877,6 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -14885,15 +14886,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -14927,6 +14929,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 10 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 @@ -14936,6 +14939,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s4, 1 @@ -14946,11 +14950,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s9, 6 ; GFX9-NEXT: s_mov_b32 s10, 7 ; GFX9-NEXT: s_mov_b32 s11, 8 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -14978,30 +14980,30 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 10 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-NEXT: s_mov_b32 s6, 3 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-NEXT: s_mov_b32 s8, 5 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-NEXT: s_mov_b32 s9, 6 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6 -; GFX10-NEXT: s_mov_b32 s10, 7 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7 -; GFX10-NEXT: s_mov_b32 s11, 8 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8i32_inreg@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8i32_inreg@abs32@lo +; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_mov_b32 s5, 2 +; GFX10-NEXT: s_mov_b32 s6, 3 +; GFX10-NEXT: s_mov_b32 s7, 4 +; GFX10-NEXT: s_mov_b32 s8, 5 +; GFX10-NEXT: s_mov_b32 s9, 6 +; GFX10-NEXT: s_mov_b32 s10, 7 +; GFX10-NEXT: s_mov_b32 s11, 8 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -15029,31 +15031,30 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 10 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, 1 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, 2 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 -; GFX11-NEXT: s_mov_b32 s6, 3 ; GFX11-NEXT: v_writelane_b32 v40, s7, 3 -; GFX11-NEXT: s_mov_b32 s7, 4 ; GFX11-NEXT: v_writelane_b32 v40, s8, 4 -; GFX11-NEXT: s_mov_b32 s8, 5 ; GFX11-NEXT: v_writelane_b32 v40, s9, 5 -; GFX11-NEXT: s_mov_b32 s9, 6 ; GFX11-NEXT: v_writelane_b32 v40, s10, 6 -; GFX11-NEXT: s_mov_b32 s10, 7 ; GFX11-NEXT: v_writelane_b32 v40, s11, 7 -; GFX11-NEXT: s_mov_b32 s11, 8 ; GFX11-NEXT: v_writelane_b32 v40, s30, 8 ; GFX11-NEXT: v_writelane_b32 v40, s31, 9 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo +; GFX11-NEXT: s_mov_b32 s4, 1 +; GFX11-NEXT: s_mov_b32 s5, 2 +; GFX11-NEXT: s_mov_b32 s6, 3 +; GFX11-NEXT: s_mov_b32 s7, 4 +; GFX11-NEXT: s_mov_b32 s8, 5 +; GFX11-NEXT: s_mov_b32 s9, 6 +; GFX11-NEXT: s_mov_b32 s10, 7 +; GFX11-NEXT: s_mov_b32 s11, 8 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s30, v40, 8 +; GFX11-NEXT: v_readlane_b32 s31, v40, 9 ; GFX11-NEXT: v_readlane_b32 s11, v40, 7 ; GFX11-NEXT: v_readlane_b32 s10, v40, 6 ; GFX11-NEXT: v_readlane_b32 s9, v40, 5 @@ -15081,30 +15082,30 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 10 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 -; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 -; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 7 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 -; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8i32_inreg@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8i32_inreg@abs32@lo +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 7 +; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 8 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -15136,6 +15137,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 18 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 @@ -15147,22 +15149,21 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s12, 8 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 +; GFX9-NEXT: v_writelane_b32 v40, s30, 16 +; GFX9-NEXT: v_writelane_b32 v40, s31, 17 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 16 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16i32_inreg@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 17 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 17 ; GFX9-NEXT: v_readlane_b32 s30, v40, 16 +; GFX9-NEXT: v_readlane_b32 s31, v40, 17 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15 ; GFX9-NEXT: v_readlane_b32 s18, v40, 14 ; GFX9-NEXT: v_readlane_b32 s17, v40, 13 @@ -15198,7 +15199,6 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 18 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -15216,15 +15216,16 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: v_writelane_b32 v40, s30, 16 +; GFX10-NEXT: v_writelane_b32 v40, s31, 17 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 16 -; GFX10-NEXT: v_writelane_b32 v40, s31, 17 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-NEXT: v_readlane_b32 s30, v40, 16 +; GFX10-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15 ; GFX10-NEXT: v_readlane_b32 s18, v40, 14 ; GFX10-NEXT: v_readlane_b32 s17, v40, 13 @@ -15260,7 +15261,6 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 18 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 @@ -15278,16 +15278,17 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s17, 13 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15 +; GFX11-NEXT: v_writelane_b32 v40, s30, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 17 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 16 -; GFX11-NEXT: v_writelane_b32 v40, s31, 17 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 17 ; GFX11-NEXT: v_readlane_b32 s30, v40, 16 +; GFX11-NEXT: v_readlane_b32 s31, v40, 17 ; GFX11-NEXT: v_readlane_b32 s19, v40, 15 ; GFX11-NEXT: v_readlane_b32 s18, v40, 14 ; GFX11-NEXT: v_readlane_b32 s17, v40, 13 @@ -15323,7 +15324,6 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 18 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 @@ -15341,15 +15341,16 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13 @@ -15391,6 +15392,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 28 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 @@ -15407,23 +15409,26 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21 ; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: v_writelane_b32 v40, s27, 23 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s28, 24 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_inreg@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_inreg@abs32@lo ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s46 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 ; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: v_mov_b32_e32 v3, s49 @@ -15432,11 +15437,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_inreg@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s20, s36 ; GFX9-NEXT: s_mov_b32 s21, s37 ; GFX9-NEXT: s_mov_b32 s22, s38 @@ -15447,11 +15449,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23 @@ -15497,7 +15498,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 28 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -15515,29 +15515,40 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s46 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-NEXT: s_mov_b32 s20, s36 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 ; GFX10-NEXT: s_mov_b32 s24, s40 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s25, s41 +; GFX10-NEXT: s_mov_b32 s26, s42 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -15546,19 +15557,9 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: s_mov_b32 s26, s42 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: s_mov_b32 s29, s45 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23 @@ -15604,10 +15605,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 28 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_add_i32 s2, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -15624,44 +15622,45 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX11-NEXT: v_writelane_b32 v40, s17, 13 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15 +; GFX11-NEXT: v_writelane_b32 v40, s20, 16 +; GFX11-NEXT: v_writelane_b32 v40, s21, 17 +; GFX11-NEXT: v_writelane_b32 v40, s22, 18 +; GFX11-NEXT: v_writelane_b32 v40, s23, 19 +; GFX11-NEXT: v_writelane_b32 v40, s24, 20 +; GFX11-NEXT: v_writelane_b32 v40, s25, 21 +; GFX11-NEXT: v_writelane_b32 v40, s26, 22 +; GFX11-NEXT: v_writelane_b32 v40, s27, 23 +; GFX11-NEXT: v_writelane_b32 v40, s28, 24 +; GFX11-NEXT: v_writelane_b32 v40, s29, 25 +; GFX11-NEXT: v_writelane_b32 v40, s30, 26 +; GFX11-NEXT: v_writelane_b32 v40, s31, 27 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: s_add_i32 s2, s32, 16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: s_load_b512 s[36:51], s[0:1], 0x40 ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s20, 16 -; GFX11-NEXT: v_writelane_b32 v40, s21, 17 -; GFX11-NEXT: v_writelane_b32 v40, s22, 18 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v5, s51 -; GFX11-NEXT: v_writelane_b32 v40, s23, 19 ; GFX11-NEXT: v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v1, s47 ; GFX11-NEXT: v_dual_mov_b32 v2, s48 :: v_dual_mov_b32 v3, s49 -; GFX11-NEXT: v_writelane_b32 v40, s24, 20 ; GFX11-NEXT: s_mov_b32 s20, s36 ; GFX11-NEXT: s_mov_b32 s21, s37 ; GFX11-NEXT: s_mov_b32 s22, s38 ; GFX11-NEXT: s_mov_b32 s23, s39 -; GFX11-NEXT: v_writelane_b32 v40, s25, 21 ; GFX11-NEXT: s_mov_b32 s24, s40 ; GFX11-NEXT: s_mov_b32 s25, s41 -; GFX11-NEXT: scratch_store_b64 off, v[4:5], s2 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 -; GFX11-NEXT: v_writelane_b32 v40, s26, 22 ; GFX11-NEXT: s_mov_b32 s26, s42 -; GFX11-NEXT: v_writelane_b32 v40, s27, 23 ; GFX11-NEXT: s_mov_b32 s27, s43 -; GFX11-NEXT: v_writelane_b32 v40, s28, 24 ; GFX11-NEXT: s_mov_b32 s28, s44 -; GFX11-NEXT: v_writelane_b32 v40, s29, 25 ; GFX11-NEXT: s_mov_b32 s29, s45 -; GFX11-NEXT: v_writelane_b32 v40, s30, 26 -; GFX11-NEXT: v_writelane_b32 v40, s31, 27 +; GFX11-NEXT: scratch_store_b64 off, v[4:5], s2 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 27 ; GFX11-NEXT: v_readlane_b32 s30, v40, 26 +; GFX11-NEXT: v_readlane_b32 s31, v40, 27 ; GFX11-NEXT: v_readlane_b32 s29, v40, 25 ; GFX11-NEXT: v_readlane_b32 s28, v40, 24 ; GFX11-NEXT: v_readlane_b32 s27, v40, 23 @@ -15707,9 +15706,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -15726,46 +15723,48 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s2 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s2 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 @@ -15817,6 +15816,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 28 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 @@ -15832,41 +15832,41 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 +; GFX9-NEXT: v_writelane_b32 v40, s24, 20 +; GFX9-NEXT: v_writelane_b32 v40, s25, 21 +; GFX9-NEXT: v_writelane_b32 v40, s26, 22 +; GFX9-NEXT: v_writelane_b32 v40, s27, 23 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s52, s[34:35], 0x0 ; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 ; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 ; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s24, 20 -; GFX9-NEXT: v_writelane_b32 v40, s25, 21 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s52 -; GFX9-NEXT: v_writelane_b32 v40, s27, 23 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32_inreg@abs32@hi ; GFX9-NEXT: v_mov_b32_e32 v0, s46 -; GFX9-NEXT: v_writelane_b32 v40, s28, 24 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 ; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, s49 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32_inreg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32_inreg@abs32@lo ; GFX9-NEXT: s_mov_b32 s20, s36 ; GFX9-NEXT: s_mov_b32 s21, s37 @@ -15878,11 +15878,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23 @@ -15928,7 +15927,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 28 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -15946,6 +15944,19 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x2 ; GFX10-NEXT: s_load_dword s52, s[34:35], 0x0 @@ -15955,46 +15966,34 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_v32i32_i32_inreg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_v32i32_i32_inreg@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s52 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX10-NEXT: v_mov_b32_e32 v0, s46 -; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s20, s36 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s24, s40 ; GFX10-NEXT: s_mov_b32 s25, s41 +; GFX10-NEXT: s_mov_b32 s26, s42 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: s_mov_b32 s26, s42 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: s_mov_b32 s29, s45 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23 @@ -16040,10 +16039,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 28 -; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_add_i32 s3, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 ; GFX11-NEXT: v_writelane_b32 v40, s6, 2 @@ -16060,6 +16056,20 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s17, 13 ; GFX11-NEXT: v_writelane_b32 v40, s18, 14 ; GFX11-NEXT: v_writelane_b32 v40, s19, 15 +; GFX11-NEXT: v_writelane_b32 v40, s20, 16 +; GFX11-NEXT: v_writelane_b32 v40, s21, 17 +; GFX11-NEXT: v_writelane_b32 v40, s22, 18 +; GFX11-NEXT: v_writelane_b32 v40, s23, 19 +; GFX11-NEXT: v_writelane_b32 v40, s24, 20 +; GFX11-NEXT: v_writelane_b32 v40, s25, 21 +; GFX11-NEXT: v_writelane_b32 v40, s26, 22 +; GFX11-NEXT: v_writelane_b32 v40, s27, 23 +; GFX11-NEXT: v_writelane_b32 v40, s28, 24 +; GFX11-NEXT: v_writelane_b32 v40, s29, 25 +; GFX11-NEXT: v_writelane_b32 v40, s30, 26 +; GFX11-NEXT: v_writelane_b32 v40, s31, 27 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: s_add_i32 s3, s32, 16 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x2 ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x0 @@ -16067,41 +16077,28 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX11-NEXT: s_load_b512 s[4:19], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32_inreg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32_inreg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s20, 16 -; GFX11-NEXT: v_writelane_b32 v40, s21, 17 -; GFX11-NEXT: v_writelane_b32 v40, s22, 18 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v5, s51 -; GFX11-NEXT: v_writelane_b32 v40, s23, 19 ; GFX11-NEXT: v_dual_mov_b32 v4, s50 :: v_dual_mov_b32 v1, s47 ; GFX11-NEXT: v_dual_mov_b32 v0, s46 :: v_dual_mov_b32 v3, s49 -; GFX11-NEXT: v_writelane_b32 v40, s24, 20 ; GFX11-NEXT: v_mov_b32_e32 v2, s48 ; GFX11-NEXT: s_add_i32 s2, s32, 24 ; GFX11-NEXT: s_mov_b32 s20, s36 ; GFX11-NEXT: s_mov_b32 s21, s37 -; GFX11-NEXT: v_writelane_b32 v40, s25, 21 ; GFX11-NEXT: s_mov_b32 s22, s38 ; GFX11-NEXT: s_mov_b32 s23, s39 ; GFX11-NEXT: s_mov_b32 s24, s40 ; GFX11-NEXT: s_mov_b32 s25, s41 -; GFX11-NEXT: v_writelane_b32 v40, s26, 22 ; GFX11-NEXT: s_mov_b32 s26, s42 -; GFX11-NEXT: scratch_store_b32 off, v6, s2 -; GFX11-NEXT: scratch_store_b64 off, v[4:5], s3 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 -; GFX11-NEXT: v_writelane_b32 v40, s27, 23 ; GFX11-NEXT: s_mov_b32 s27, s43 -; GFX11-NEXT: v_writelane_b32 v40, s28, 24 ; GFX11-NEXT: s_mov_b32 s28, s44 -; GFX11-NEXT: v_writelane_b32 v40, s29, 25 ; GFX11-NEXT: s_mov_b32 s29, s45 -; GFX11-NEXT: v_writelane_b32 v40, s30, 26 -; GFX11-NEXT: v_writelane_b32 v40, s31, 27 +; GFX11-NEXT: scratch_store_b32 off, v6, s2 +; GFX11-NEXT: scratch_store_b64 off, v[4:5], s3 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 27 ; GFX11-NEXT: v_readlane_b32 s30, v40, 26 +; GFX11-NEXT: v_readlane_b32 s31, v40, 27 ; GFX11-NEXT: v_readlane_b32 s29, v40, 25 ; GFX11-NEXT: v_readlane_b32 s28, v40, 24 ; GFX11-NEXT: v_readlane_b32 s27, v40, 23 @@ -16147,9 +16144,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 28 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: s_add_i32 s3, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 @@ -16166,6 +16161,20 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-SCRATCH-NEXT: s_add_i32 s3, s32, 16 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x2 ; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -16175,43 +16184,31 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v32i32_i32_inreg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v32i32_i32_inreg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, s2 ; GFX10-SCRATCH-NEXT: s_add_i32 s2, s32, 24 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s2 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s3 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s2 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s3 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 @@ -16263,21 +16260,21 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 -; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 +; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; GFX9-NEXT: s_mov_b32 s35, stack_passed_f64_arg@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, stack_passed_f64_arg@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -16296,22 +16293,22 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: s_mov_b32 s35, stack_passed_f64_arg@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, stack_passed_f64_arg@abs32@lo ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -16330,18 +16327,18 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:8 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: scratch_load_b64 v[32:33], off, s33 ; GFX11-NEXT: s_mov_b32 s1, stack_passed_f64_arg@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, stack_passed_f64_arg@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -16360,18 +16357,18 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s33 offset:8 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 -; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, stack_passed_f64_arg@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, stack_passed_f64_arg@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -16395,16 +16392,17 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 12 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX9-NEXT: v_mov_b32_e32 v0, 14 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_12xv3i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_12xv3i32@abs32@lo @@ -16440,10 +16438,9 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v29, 9 ; GFX9-NEXT: v_mov_b32_e32 v30, 10 ; GFX9-NEXT: v_mov_b32_e32 v31, 11 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -16463,12 +16460,14 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 12 ; GFX10-NEXT: v_mov_b32_e32 v1, 13 ; GFX10-NEXT: v_mov_b32_e32 v2, 14 -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 @@ -16477,7 +16476,6 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 1 -; GFX10-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-NEXT: v_mov_b32_e32 v5, 1 ; GFX10-NEXT: v_mov_b32_e32 v6, 2 ; GFX10-NEXT: v_mov_b32_e32 v7, 2 @@ -16507,10 +16505,9 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v31, 11 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_12xv3i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_12xv3i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -16530,15 +16527,16 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 12 :: v_dual_mov_b32 v1, 13 ; GFX11-NEXT: v_dual_mov_b32 v2, 14 :: v_dual_mov_b32 v3, 15 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 1 +; GFX11-NEXT: v_dual_mov_b32 v6, 2 :: v_dual_mov_b32 v7, 2 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1 -; GFX11-NEXT: v_dual_mov_b32 v6, 2 :: v_dual_mov_b32 v7, 2 ; GFX11-NEXT: v_dual_mov_b32 v8, 2 :: v_dual_mov_b32 v9, 3 ; GFX11-NEXT: v_dual_mov_b32 v10, 3 :: v_dual_mov_b32 v11, 3 ; GFX11-NEXT: v_dual_mov_b32 v12, 4 :: v_dual_mov_b32 v13, 4 @@ -16553,11 +16551,10 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX11-NEXT: v_dual_mov_b32 v30, 10 :: v_dual_mov_b32 v31, 11 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_12xv3i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_12xv3i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -16577,21 +16574,22 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 12 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 13 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 14 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 1 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 2 +; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 2 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, 1 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 2 -; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v8, 2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v9, 3 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v10, 3 @@ -16618,10 +16616,9 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 11 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_12xv3i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_12xv3i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -16657,7 +16654,10 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 9 @@ -16671,10 +16671,8 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; GFX9-NEXT: v_mov_b32_e32 v0, 14 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_8xv5i32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_8xv5i32@abs32@lo @@ -16710,10 +16708,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v29, 5 ; GFX9-NEXT: v_mov_b32_e32 v30, 6 ; GFX9-NEXT: v_mov_b32_e32 v31, 7 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -16732,20 +16729,22 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 8 ; GFX10-NEXT: v_mov_b32_e32 v1, 9 ; GFX10-NEXT: v_mov_b32_e32 v2, 10 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_mov_b32_e32 v3, 14 +; GFX10-NEXT: v_mov_b32_e32 v4, 15 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: v_mov_b32_e32 v0, 11 ; GFX10-NEXT: v_mov_b32_e32 v1, 12 ; GFX10-NEXT: v_mov_b32_e32 v2, 13 -; GFX10-NEXT: v_mov_b32_e32 v3, 14 -; GFX10-NEXT: v_mov_b32_e32 v4, 15 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_mov_b32_e32 v5, 1 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 @@ -16756,7 +16755,6 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-NEXT: v_mov_b32_e32 v5, 1 ; GFX10-NEXT: v_mov_b32_e32 v6, 1 ; GFX10-NEXT: v_mov_b32_e32 v7, 1 ; GFX10-NEXT: v_mov_b32_e32 v8, 1 @@ -16785,10 +16783,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v31, 7 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_8xv5i32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_8xv5i32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -16808,12 +16805,13 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_dual_mov_b32 v0, 8 :: v_dual_mov_b32 v1, 9 ; GFX11-NEXT: v_dual_mov_b32 v2, 10 :: v_dual_mov_b32 v3, 11 ; GFX11-NEXT: v_dual_mov_b32 v4, 12 :: v_dual_mov_b32 v5, 13 ; GFX11-NEXT: v_dual_mov_b32 v6, 14 :: v_dual_mov_b32 v7, 15 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0 @@ -16835,11 +16833,10 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX11-NEXT: v_dual_mov_b32 v30, 6 :: v_dual_mov_b32 v31, 7 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_8xv5i32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_8xv5i32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -16859,6 +16856,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 8 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 9 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 10 @@ -16867,8 +16867,6 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 13 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 14 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 15 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s0, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s0 @@ -16906,10 +16904,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 7 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_8xv5i32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_8xv5i32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -16941,7 +16938,10 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41100000 @@ -16955,10 +16955,8 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_8xv5f32@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_8xv5f32@abs32@lo @@ -16994,10 +16992,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000 ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17016,20 +17013,22 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41100000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41200000 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_mov_b32_e32 v3, 0x41600000 +; GFX10-NEXT: v_mov_b32_e32 v4, 0x41700000 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41300000 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41400000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41500000 -; GFX10-NEXT: v_mov_b32_e32 v3, 0x41600000 -; GFX10-NEXT: v_mov_b32_e32 v4, 0x41700000 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_mov_b32_e32 v5, 1.0 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 @@ -17040,7 +17039,6 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 -; GFX10-NEXT: v_mov_b32_e32 v5, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v6, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v7, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v8, 1.0 @@ -17069,10 +17067,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_8xv5f32@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_8xv5f32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17092,6 +17089,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 +; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s31, 1 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x41100000 ; GFX11-NEXT: v_mov_b32_e32 v2, 0x41200000 @@ -17100,8 +17100,6 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX11-NEXT: v_mov_b32_e32 v5, 0x41500000 ; GFX11-NEXT: v_mov_b32_e32 v6, 0x41600000 ; GFX11-NEXT: v_mov_b32_e32 v7, 0x41700000 -; GFX11-NEXT: s_add_i32 s32, s32, 16 -; GFX11-NEXT: v_writelane_b32 v40, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 16 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 ; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0 @@ -17124,11 +17122,10 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX11-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_8xv5f32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_8xv5f32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17148,6 +17145,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, 0x41100000 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, 0x41200000 @@ -17156,8 +17156,6 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, 0x41500000 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, 0x41600000 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v7, 0x41700000 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s0, s32, 16 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 ; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[4:7], s0 @@ -17195,10 +17193,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_8xv5f32@abs32@hi ; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_8xv5f32@abs32@lo -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17232,13 +17229,13 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17258,14 +17255,14 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17285,15 +17282,15 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17313,14 +17310,14 @@ define amdgpu_gfx void @test_call_external_void_func_bf16(i16 %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17346,13 +17343,13 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17372,14 +17369,14 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17399,15 +17396,15 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17427,14 +17424,14 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16(i16 %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17460,13 +17457,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17486,14 +17483,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17513,15 +17510,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17541,14 +17538,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16(i32 %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17574,13 +17571,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17600,14 +17597,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17627,15 +17624,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17655,14 +17652,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16(<3 x i16> %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17688,13 +17685,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17714,14 +17711,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17741,15 +17738,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17769,14 +17766,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16(<4 x i16> %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17802,13 +17799,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17828,14 +17825,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17855,15 +17852,15 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17883,14 +17880,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16(<8 x i16> %arg) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -17916,13 +17913,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -17942,14 +17939,14 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -17969,15 +17966,15 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -17997,14 +17994,14 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16(<16 x i16> %arg) #0 ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18030,13 +18027,13 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18056,14 +18053,14 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18083,15 +18080,15 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18111,14 +18108,14 @@ define amdgpu_gfx void @test_call_external_void_func_bf16_inreg(i16 inreg %arg) ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18144,13 +18141,13 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18170,14 +18167,14 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v1bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v1bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18197,15 +18194,15 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18225,14 +18222,14 @@ define amdgpu_gfx void @test_call_external_void_func_v1bf16_inreg(i16 inreg %arg ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v1bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v1bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18258,13 +18255,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18284,14 +18281,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v2bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v2bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18311,15 +18308,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18339,14 +18336,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2bf16_inreg(i32 inreg %arg ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v2bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v2bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18372,13 +18369,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18398,14 +18395,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v3bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v3bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18425,15 +18422,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18453,14 +18450,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3bf16_inreg(<3 x i16> inre ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v3bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v3bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18486,13 +18483,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18512,14 +18509,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v4bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v4bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18539,15 +18536,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18567,14 +18564,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4bf16_inreg(<4 x i16> inre ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v4bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v4bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18600,13 +18597,13 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18626,14 +18623,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v8bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v8bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18653,15 +18650,15 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18681,14 +18678,14 @@ define amdgpu_gfx void @test_call_external_void_func_v8bf16_inreg(<8 x i16> inre ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v8bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v8bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 @@ -18714,13 +18711,13 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -18740,14 +18737,14 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_v16bf16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_v16bf16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -18767,15 +18764,15 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -18795,14 +18792,14 @@ define amdgpu_gfx void @test_call_external_void_func_v16bf16_inreg(<16 x i16> in ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s0, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi -; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s1, external_void_func_v16bf16@abs32@hi +; GFX10-SCRATCH-NEXT: s_mov_b32 s0, external_void_func_v16bf16@abs32@lo ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s32, s33 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll index 124de7e00f020..576b481ca4ccf 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -15,19 +15,19 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 4 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 @@ -51,17 +51,17 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX10-NEXT: v_writelane_b32 v40, s34, 4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 @@ -85,18 +85,18 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX11-NEXT: v_writelane_b32 v40, s0, 4 ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 -; GFX11-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo ; GFX11-NEXT: v_writelane_b32 v40, s5, 1 -; GFX11-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi ; GFX11-NEXT: v_writelane_b32 v40, s30, 2 ; GFX11-NEXT: v_writelane_b32 v40, s31, 3 +; GFX11-NEXT: s_mov_b32 s5, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s4, external_void_func_void@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s30, v40, 2 +; GFX11-NEXT: v_readlane_b32 s31, v40, 3 ; GFX11-NEXT: v_readlane_b32 s5, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 @@ -130,8 +130,8 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; clobber ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s31, v0, 3 ; GFX9-NEXT: v_readlane_b32 s30, v0, 2 +; GFX9-NEXT: v_readlane_b32 s31, v0, 3 ; GFX9-NEXT: v_readlane_b32 s29, v0, 1 ; GFX9-NEXT: v_readlane_b32 s28, v0, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 @@ -157,8 +157,8 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; clobber ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s31, v0, 3 ; GFX10-NEXT: v_readlane_b32 s30, v0, 2 +; GFX10-NEXT: v_readlane_b32 s31, v0, 3 ; GFX10-NEXT: v_readlane_b32 s29, v0, 1 ; GFX10-NEXT: v_readlane_b32 s28, v0, 0 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 @@ -185,8 +185,8 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { ; GFX11-NEXT: ; clobber ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v0, 3 ; GFX11-NEXT: v_readlane_b32 s30, v0, 2 +; GFX11-NEXT: v_readlane_b32 s31, v0, 3 ; GFX11-NEXT: v_readlane_b32 s29, v0, 1 ; GFX11-NEXT: v_readlane_b32 s28, v0, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 @@ -209,12 +209,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s31 ; GFX9-NEXT: ;;#ASMEND @@ -224,8 +224,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s31 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -246,12 +246,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s31 ; GFX10-NEXT: ;;#ASMEND @@ -261,8 +261,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s31 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -283,12 +283,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: v_writelane_b32 v40, s30, 1 ; GFX11-NEXT: v_writelane_b32 v40, s31, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s31 ; GFX11-NEXT: ;;#ASMEND @@ -298,8 +298,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s31 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s30, v40, 1 +; GFX11-NEXT: v_readlane_b32 s31, v40, 2 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -325,12 +325,12 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v41, s34, 2 -; GFX9-NEXT: v_writelane_b32 v41, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v41, s30, 0 ; GFX9-NEXT: v_writelane_b32 v41, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v31 ; GFX9-NEXT: ;;#ASMEND @@ -341,8 +341,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX9-NEXT: ; use v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v41, 1 ; GFX9-NEXT: v_readlane_b32 s30, v41, 0 +; GFX9-NEXT: v_readlane_b32 s31, v41, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v41, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -362,24 +362,24 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v41, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_writelane_b32 v41, s30, 0 +; GFX10-NEXT: v_writelane_b32 v41, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def v31 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_mov_b32_e32 v40, v31 -; GFX10-NEXT: v_writelane_b32 v41, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_mov_b32_e32 v31, v40 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v31 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s31, v41, 1 ; GFX10-NEXT: v_readlane_b32 s30, v41, 0 +; GFX10-NEXT: v_readlane_b32 s31, v41, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v41, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -399,25 +399,25 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v41, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: v_writelane_b32 v41, s30, 0 +; GFX11-NEXT: v_writelane_b32 v41, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def v31 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_mov_b32_e32 v40, v31 -; GFX11-NEXT: v_writelane_b32 v41, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mov_b32_e32 v31, v40 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use v31 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-NEXT: v_readlane_b32 s31, v41, 1 ; GFX11-NEXT: v_readlane_b32 s30, v41, 0 +; GFX11-NEXT: v_readlane_b32 s31, v41, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v41, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -443,23 +443,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s33 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s33, s4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -480,23 +480,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s33 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, s33 -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_mov_b32 s33, s4 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s33 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -517,24 +517,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s33 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, s33 -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_mov_b32 s33, s4 +; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s33 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 -; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -560,23 +559,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s34 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, s34 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s34, s4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s34 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -597,23 +596,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s34 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, s34 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_mov_b32 s34, s4 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s34 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -634,24 +633,23 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s34 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, s34 -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: s_mov_b32 s34, s4 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s34 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 -; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -677,12 +675,12 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v41, s34, 2 -; GFX9-NEXT: v_writelane_b32 v41, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v41, s30, 0 ; GFX9-NEXT: v_writelane_b32 v41, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v40 ; GFX9-NEXT: ;;#ASMEND @@ -691,8 +689,8 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX9-NEXT: ; use v40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v41, 1 ; GFX9-NEXT: v_readlane_b32 s30, v41, 0 +; GFX9-NEXT: v_readlane_b32 s31, v41, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v41, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -712,22 +710,22 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v41, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_writelane_b32 v41, s30, 0 +; GFX10-NEXT: v_writelane_b32 v41, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def v40 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_writelane_b32 v41, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s31, v41, 1 ; GFX10-NEXT: v_readlane_b32 s30, v41, 0 +; GFX10-NEXT: v_readlane_b32 s31, v41, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v41, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -747,22 +745,22 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v41, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: v_writelane_b32 v41, s30, 0 +; GFX11-NEXT: v_writelane_b32 v41, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def v40 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v41, s31, 1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use v40 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-NEXT: v_readlane_b32 s31, v41, 1 ; GFX11-NEXT: v_readlane_b32 s30, v41, 0 +; GFX11-NEXT: v_readlane_b32 s31, v41, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v41, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -844,13 +842,13 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -870,14 +868,14 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s33@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s33@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -897,15 +895,15 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s33@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s33@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s33@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s33@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -929,13 +927,13 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -955,14 +953,14 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, void_func_void_clobber_s34@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, void_func_void_clobber_s34@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s35, -1 @@ -982,15 +980,15 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 2 -; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s34@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s34@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 +; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, void_func_void_clobber_s34@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, void_func_void_clobber_s34@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 +; GFX11-NEXT: v_readlane_b32 s31, v40, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -1013,22 +1011,22 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v40, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s40 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v40, 3 @@ -1049,22 +1047,22 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v40, s34, 3 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, s40 -; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v40, 3 @@ -1085,23 +1083,22 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v40, s0, 3 +; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v40, s4, 0 +; GFX11-NEXT: v_writelane_b32 v40, s30, 1 +; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo -; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s40 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v40, s4, 0 ; GFX11-NEXT: s_mov_b32 s4, s40 -; GFX11-NEXT: v_writelane_b32 v40, s30, 1 -; GFX11-NEXT: v_writelane_b32 v40, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s4 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s31, v40, 2 -; GFX11-NEXT: v_readlane_b32 s30, v40, 1 ; GFX11-NEXT: v_readlane_b32 s4, v40, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v40, 3 @@ -1127,13 +1124,13 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-NEXT: v_writelane_b32 v41, s34, 3 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_writelane_b32 v41, s4, 0 ; GFX9-NEXT: v_writelane_b32 v41, s30, 1 +; GFX9-NEXT: v_writelane_b32 v41, s31, 2 ; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: v_writelane_b32 v41, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s40 ; GFX9-NEXT: ;;#ASMEND @@ -1150,8 +1147,8 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: ; use v40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v41, 2 ; GFX9-NEXT: v_readlane_b32 s30, v41, 1 +; GFX9-NEXT: v_readlane_b32 s31, v41, 2 ; GFX9-NEXT: v_readlane_b32 s4, v41, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s34, v41, 3 @@ -1172,11 +1169,13 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s35 ; GFX10-NEXT: v_writelane_b32 v41, s34, 3 -; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: v_writelane_b32 v41, s4, 0 +; GFX10-NEXT: v_writelane_b32 v41, s30, 1 +; GFX10-NEXT: v_writelane_b32 v41, s31, 2 +; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND @@ -1185,8 +1184,6 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: ; def v32 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_mov_b32_e32 v40, v32 -; GFX10-NEXT: v_writelane_b32 v41, s30, 1 -; GFX10-NEXT: v_writelane_b32 v41, s31, 2 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 @@ -1195,8 +1192,8 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: ; use v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s31, v41, 2 ; GFX10-NEXT: v_readlane_b32 s30, v41, 1 +; GFX10-NEXT: v_readlane_b32 s31, v41, 2 ; GFX10-NEXT: v_readlane_b32 s4, v41, 0 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s34, v41, 3 @@ -1217,11 +1214,13 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: v_writelane_b32 v41, s0, 3 -; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX11-NEXT: v_writelane_b32 v41, s4, 0 +; GFX11-NEXT: v_writelane_b32 v41, s30, 1 +; GFX11-NEXT: v_writelane_b32 v41, s31, 2 +; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s40 ; GFX11-NEXT: ;;#ASMEND @@ -1230,8 +1229,6 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX11-NEXT: ; def v32 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_mov_b32_e32 v40, v32 -; GFX11-NEXT: v_writelane_b32 v41, s30, 1 -; GFX11-NEXT: v_writelane_b32 v41, s31, 2 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s4 @@ -1240,8 +1237,8 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX11-NEXT: ; use v40 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GFX11-NEXT: v_readlane_b32 s31, v41, 2 ; GFX11-NEXT: v_readlane_b32 s30, v41, 1 +; GFX11-NEXT: v_readlane_b32 s31, v41, 2 ; GFX11-NEXT: v_readlane_b32 s4, v41, 0 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v41, 3 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index b750d28ffa7d3..891c6e37185d3 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -29,13 +29,13 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, return_i1@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, return_i1@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, return_i1@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_i1@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -54,13 +54,13 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, return_i1@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, return_i1@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, return_i1@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_i1@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -79,14 +79,14 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, return_i1@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, return_i1@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, return_i1@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_i1@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: v_readlane_b32 s30, v1, 0 +; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload @@ -131,13 +131,13 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, return_i16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, return_i16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, return_i16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_i16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -156,13 +156,13 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, return_i16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, return_i16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, return_i16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_i16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -181,14 +181,14 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, return_i16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, return_i16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, return_i16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_i16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: v_readlane_b32 s30, v1, 0 +; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload @@ -227,13 +227,13 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v1, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, return_2xi16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, return_2xi16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v1, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, return_2xi16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_2xi16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -252,13 +252,13 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, return_2xi16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, return_2xi16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v1, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, return_2xi16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_2xi16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -277,14 +277,14 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX11-NEXT: scratch_store_b32 off, v1, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v1, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, return_2xi16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, return_2xi16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v1, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, return_2xi16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_2xi16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: v_readlane_b32 s30, v1, 0 +; GFX11-NEXT: v_readlane_b32 s31, v1, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v1, off, s33 ; 4-byte Folded Reload @@ -331,13 +331,13 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v2, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, return_3xi16@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, return_3xi16@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v2, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, return_3xi16@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_3xi16@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v2, 1 ; GFX9-NEXT: v_readlane_b32 s30, v2, 0 +; GFX9-NEXT: v_readlane_b32 s31, v2, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -356,13 +356,13 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v2, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, return_3xi16@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, return_3xi16@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v2, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, return_3xi16@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_3xi16@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v2, 1 ; GFX10-NEXT: v_readlane_b32 s30, v2, 0 +; GFX10-NEXT: v_readlane_b32 s31, v2, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -381,14 +381,14 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX11-NEXT: scratch_store_b32 off, v2, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v2, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, return_3xi16@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, return_3xi16@abs32@lo ; GFX11-NEXT: s_add_i32 s32, s32, 16 ; GFX11-NEXT: v_writelane_b32 v2, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, return_3xi16@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_3xi16@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v2, 1 ; GFX11-NEXT: v_readlane_b32 s30, v2, 0 +; GFX11-NEXT: v_readlane_b32 s31, v2, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v2, off, s33 ; 4-byte Folded Reload @@ -680,9 +680,6 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v100, s30, 0 -; GFX9-NEXT: s_mov_b32 s35, return_100xi32@abs32@hi -; GFX9-NEXT: s_mov_b32 s34, return_100xi32@abs32@lo ; GFX9-NEXT: s_addk_i32 s32, 0x2400 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill @@ -716,7 +713,10 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX9-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v95, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v100, s30, 0 ; GFX9-NEXT: v_writelane_b32 v100, s31, 1 +; GFX9-NEXT: s_mov_b32 s35, return_100xi32@abs32@hi +; GFX9-NEXT: s_mov_b32 s34, return_100xi32@abs32@lo ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: buffer_load_dword v95, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload @@ -750,8 +750,8 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v100, 1 ; GFX9-NEXT: v_readlane_b32 s30, v100, 0 +; GFX9-NEXT: v_readlane_b32 s31, v100, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -769,9 +769,6 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX10-NEXT: buffer_store_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v100, s30, 0 -; GFX10-NEXT: s_mov_b32 s35, return_100xi32@abs32@hi -; GFX10-NEXT: s_mov_b32 s34, return_100xi32@abs32@lo ; GFX10-NEXT: s_addk_i32 s32, 0x1200 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill @@ -805,7 +802,10 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX10-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v95, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v100, s30, 0 ; GFX10-NEXT: v_writelane_b32 v100, s31, 1 +; GFX10-NEXT: s_mov_b32 s35, return_100xi32@abs32@hi +; GFX10-NEXT: s_mov_b32 s34, return_100xi32@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_clause 0x1f ; GFX10-NEXT: buffer_load_dword v95, off, s[0:3], s33 @@ -840,8 +840,8 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:116 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:120 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:124 -; GFX10-NEXT: v_readlane_b32 s31, v100, 1 ; GFX10-NEXT: v_readlane_b32 s30, v100, 0 +; GFX10-NEXT: v_readlane_b32 s31, v100, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: s_xor_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v100, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -859,44 +859,76 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v100, s33 offset:128 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v100, s30, 0 -; GFX11-NEXT: s_mov_b32 s1, return_100xi32@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, return_100xi32@abs32@lo ; GFX11-NEXT: s_addk_i32 s32, 0x90 ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:124 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:120 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:116 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:112 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:108 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:104 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s33 offset:100 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s33 offset:96 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s33 offset:92 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s33 offset:88 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s33 offset:84 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s33 offset:80 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:76 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s33 offset:72 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s33 offset:68 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s33 offset:64 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v72, s33 offset:60 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v73, s33 offset:56 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v74, s33 offset:52 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v75, s33 offset:48 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v76, s33 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v77, s33 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v78, s33 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v79, s33 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v88, s33 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v89, s33 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v90, s33 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v91, s33 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v92, s33 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v93, s33 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v94, s33 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v95, s33 +; GFX11-NEXT: v_writelane_b32 v100, s30, 0 ; GFX11-NEXT: v_writelane_b32 v100, s31, 1 +; GFX11-NEXT: s_mov_b32 s1, return_100xi32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_100xi32@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0x1f ; GFX11-NEXT: scratch_load_b32 v95, off, s33 @@ -931,8 +963,8 @@ define amdgpu_gfx void @call_100xi32() #0 { ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:116 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:120 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:124 -; GFX11-NEXT: v_readlane_b32 s31, v100, 1 ; GFX11-NEXT: v_readlane_b32 s30, v100, 0 +; GFX11-NEXT: v_readlane_b32 s31, v100, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v100, off, s33 offset:128 ; 4-byte Folded Reload @@ -2142,17 +2174,17 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX9-NEXT: s_xor_saveexec_b64 s[36:37], -1 ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-NEXT: v_writelane_b32 v2, s30, 0 -; GFX9-NEXT: s_mov_b32 s37, return_512xi32@abs32@hi -; GFX9-NEXT: s_mov_b32 s36, return_512xi32@abs32@lo -; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: s_mov_b32 s38, s34 ; GFX9-NEXT: s_mov_b32 s34, s32 +; GFX9-NEXT: v_writelane_b32 v2, s30, 0 ; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 ; GFX9-NEXT: v_writelane_b32 v2, s31, 1 +; GFX9-NEXT: s_mov_b32 s37, return_512xi32@abs32@hi +; GFX9-NEXT: s_mov_b32 s36, return_512xi32@abs32@lo +; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] -; GFX9-NEXT: v_readlane_b32 s31, v2, 1 ; GFX9-NEXT: v_readlane_b32 s30, v2, 0 +; GFX9-NEXT: v_readlane_b32 s31, v2, 1 ; GFX9-NEXT: s_mov_b32 s32, s34 ; GFX9-NEXT: s_mov_b32 s34, s38 ; GFX9-NEXT: s_xor_saveexec_b64 s[36:37], -1 @@ -2172,17 +2204,17 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:2048 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s36 -; GFX10-NEXT: v_writelane_b32 v2, s30, 0 -; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: s_mov_b32 s37, return_512xi32@abs32@hi -; GFX10-NEXT: s_mov_b32 s36, return_512xi32@abs32@lo ; GFX10-NEXT: s_mov_b32 s38, s34 ; GFX10-NEXT: s_mov_b32 s34, s32 +; GFX10-NEXT: v_writelane_b32 v2, s30, 0 ; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 ; GFX10-NEXT: v_writelane_b32 v2, s31, 1 +; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10-NEXT: s_mov_b32 s37, return_512xi32@abs32@hi +; GFX10-NEXT: s_mov_b32 s36, return_512xi32@abs32@lo ; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37] -; GFX10-NEXT: v_readlane_b32 s31, v2, 1 ; GFX10-NEXT: v_readlane_b32 s30, v2, 0 +; GFX10-NEXT: v_readlane_b32 s31, v2, 1 ; GFX10-NEXT: s_mov_b32 s32, s34 ; GFX10-NEXT: s_mov_b32 s34, s38 ; GFX10-NEXT: s_xor_saveexec_b32 s36, -1 @@ -2203,18 +2235,18 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v5, s33 offset:2048 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v5, s30, 0 -; GFX11-NEXT: v_mov_b32_e32 v0, s33 -; GFX11-NEXT: s_mov_b32 s1, return_512xi32@abs32@hi -; GFX11-NEXT: s_mov_b32 s0, return_512xi32@abs32@lo ; GFX11-NEXT: s_mov_b32 s36, s34 ; GFX11-NEXT: s_mov_b32 s34, s32 +; GFX11-NEXT: v_writelane_b32 v5, s30, 0 ; GFX11-NEXT: s_addk_i32 s32, 0x1800 ; GFX11-NEXT: v_writelane_b32 v5, s31, 1 +; GFX11-NEXT: v_mov_b32_e32 v0, s33 +; GFX11-NEXT: s_mov_b32 s1, return_512xi32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_512xi32@abs32@lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v5, 1 ; GFX11-NEXT: v_readlane_b32 s30, v5, 0 +; GFX11-NEXT: v_readlane_b32 s31, v5, 1 ; GFX11-NEXT: s_mov_b32 s32, s34 ; GFX11-NEXT: s_mov_b32 s34, s36 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 @@ -2520,17 +2552,29 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_clause 0xc ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:212 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:208 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:204 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:200 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:196 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:192 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:188 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:184 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:180 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:176 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:172 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:168 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:164 ; GFX11-NEXT: s_clause 0x11 ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:16 @@ -2640,7 +2684,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: s_mov_b32 s38, s34 ; GFX9-NEXT: s_mov_b32 s34, s32 ; GFX9-NEXT: s_add_i32 s32, s32, 0x28000 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill @@ -2656,6 +2699,9 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v63, s30, 0 +; GFX9-NEXT: v_writelane_b32 v63, s31, 1 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 @@ -2698,7 +2744,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:156 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:160 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX9-NEXT: v_writelane_b32 v63, s30, 0 ; GFX9-NEXT: s_mov_b32 s37, return_72xi32@abs32@hi ; GFX9-NEXT: s_mov_b32 s36, return_72xi32@abs32@lo ; GFX9-NEXT: v_add_u32_e32 v0, 0x200, v0 @@ -2733,7 +2778,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: v_mov_b32_e32 v29, 0 ; GFX9-NEXT: v_mov_b32_e32 v30, 0 ; GFX9-NEXT: v_mov_b32_e32 v31, 0 -; GFX9-NEXT: v_writelane_b32 v63, s31, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:636 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:640 @@ -2889,8 +2933,8 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v63, 1 ; GFX9-NEXT: v_readlane_b32 s30, v63, 0 +; GFX9-NEXT: v_readlane_b32 s31, v63, 1 ; GFX9-NEXT: s_mov_b32 s32, s34 ; GFX9-NEXT: s_mov_b32 s34, s38 ; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1 @@ -2910,7 +2954,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:1568 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s36 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_mov_b32 s38, s34 ; GFX10-NEXT: s_mov_b32 s34, s32 ; GFX10-NEXT: s_add_i32 s32, s32, 0x14000 @@ -2929,6 +2972,13 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v63, s30, 0 +; GFX10-NEXT: v_writelane_b32 v63, s31, 1 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_mov_b32_e32 v3, 0 +; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 @@ -2971,16 +3021,11 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:156 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:160 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: v_writelane_b32 v63, s30, 0 -; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x200, v0 -; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: v_mov_b32_e32 v5, 0 ; GFX10-NEXT: v_mov_b32_e32 v6, 0 ; GFX10-NEXT: v_mov_b32_e32 v7, 0 ; GFX10-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x200, v0 ; GFX10-NEXT: v_mov_b32_e32 v9, 0 ; GFX10-NEXT: v_mov_b32_e32 v10, 0 ; GFX10-NEXT: v_mov_b32_e32 v11, 0 @@ -3006,7 +3051,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: v_mov_b32_e32 v31, 0 ; GFX10-NEXT: s_mov_b32 s37, return_72xi32@abs32@hi ; GFX10-NEXT: s_mov_b32 s36, return_72xi32@abs32@lo -; GFX10-NEXT: v_writelane_b32 v63, s31, 1 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX10-NEXT: s_clause 0x28 ; GFX10-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:636 @@ -3167,8 +3211,8 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 -; GFX10-NEXT: v_readlane_b32 s31, v63, 1 ; GFX10-NEXT: v_readlane_b32 s30, v63, 0 +; GFX10-NEXT: v_readlane_b32 s31, v63, 1 ; GFX10-NEXT: s_mov_b32 s32, s34 ; GFX10-NEXT: s_mov_b32 s34, s38 ; GFX10-NEXT: s_or_saveexec_b32 s36, -1 @@ -3189,29 +3233,42 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:1600 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: v_mov_b32_e32 v4, 0 -; GFX11-NEXT: s_mov_b32 s1, s0 -; GFX11-NEXT: s_mov_b32 s2, s0 -; GFX11-NEXT: s_mov_b32 s3, s0 -; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-NEXT: s_mov_b32 s36, s34 ; GFX11-NEXT: s_mov_b32 s34, s32 ; GFX11-NEXT: s_addk_i32 s32, 0xa00 ; GFX11-NEXT: s_clause 0xb ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:44 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:40 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:36 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:32 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:28 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:24 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v46, s33 offset:20 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v47, s33 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v56, s33 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v57, s33 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v58, s33 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v59, s33 +; GFX11-NEXT: v_writelane_b32 v60, s30, 0 +; GFX11-NEXT: v_writelane_b32 v60, s31, 1 +; GFX11-NEXT: s_mov_b32 s0, 0 +; GFX11-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-NEXT: s_mov_b32 s1, s0 +; GFX11-NEXT: s_mov_b32 s2, s0 +; GFX11-NEXT: s_mov_b32 s3, s0 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-NEXT: s_add_i32 s0, s32, 0xa0 ; GFX11-NEXT: s_add_i32 s1, s32, 0x90 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 @@ -3232,7 +3289,6 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 ; GFX11-NEXT: s_add_i32 s2, s33, 0x200 -; GFX11-NEXT: v_writelane_b32 v60, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 ; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0 @@ -3253,7 +3309,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0 ; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi ; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo -; GFX11-NEXT: v_writelane_b32 v60, s31, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b128 v[45:48], off, s33 offset:624 @@ -3365,8 +3421,8 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:36 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:40 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:44 -; GFX11-NEXT: v_readlane_b32 s31, v60, 1 ; GFX11-NEXT: v_readlane_b32 s30, v60, 0 +; GFX11-NEXT: v_readlane_b32 s31, v60, 1 ; GFX11-NEXT: s_mov_b32 s32, s34 ; GFX11-NEXT: s_mov_b32 s34, s36 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 diff --git a/llvm/test/CodeGen/AMDGPU/global-alias.ll b/llvm/test/CodeGen/AMDGPU/global-alias.ll index d8df20eb69452..4c7bef4aec091 100644 --- a/llvm/test/CodeGen/AMDGPU/global-alias.ll +++ b/llvm/test/CodeGen/AMDGPU/global-alias.ll @@ -35,8 +35,8 @@ define void @bar() { ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll index 10d61deed71cc..424aaaea11722 100644 --- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll +++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll @@ -9,28 +9,30 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: v_writelane_b32 v6, s30, 0 -; CHECK-NEXT: v_writelane_b32 v6, s31, 1 -; CHECK-NEXT: v_writelane_b32 v6, s36, 2 -; CHECK-NEXT: v_writelane_b32 v6, s37, 3 -; CHECK-NEXT: v_writelane_b32 v6, s38, 4 -; CHECK-NEXT: v_writelane_b32 v6, s39, 5 -; CHECK-NEXT: v_writelane_b32 v6, s48, 6 -; CHECK-NEXT: v_writelane_b32 v6, s49, 7 -; CHECK-NEXT: v_writelane_b32 v6, s50, 8 -; CHECK-NEXT: v_writelane_b32 v6, s51, 9 -; CHECK-NEXT: v_writelane_b32 v6, s52, 10 -; CHECK-NEXT: v_writelane_b32 v6, s53, 11 -; CHECK-NEXT: v_writelane_b32 v6, s54, 12 -; CHECK-NEXT: v_writelane_b32 v6, s55, 13 -; CHECK-NEXT: v_writelane_b32 v6, s64, 14 -; CHECK-NEXT: v_writelane_b32 v6, s65, 15 -; CHECK-NEXT: v_writelane_b32 v6, s66, 16 -; CHECK-NEXT: v_writelane_b32 v6, s67, 17 -; CHECK-NEXT: v_writelane_b32 v6, s68, 18 +; CHECK-NEXT: v_writelane_b32 v6, s36, 0 +; CHECK-NEXT: v_writelane_b32 v6, s37, 1 +; CHECK-NEXT: v_writelane_b32 v6, s38, 2 +; CHECK-NEXT: v_writelane_b32 v6, s39, 3 +; CHECK-NEXT: v_writelane_b32 v6, s48, 4 +; CHECK-NEXT: v_writelane_b32 v6, s49, 5 +; CHECK-NEXT: v_writelane_b32 v6, s50, 6 +; CHECK-NEXT: v_writelane_b32 v6, s51, 7 +; CHECK-NEXT: v_writelane_b32 v6, s52, 8 +; CHECK-NEXT: v_writelane_b32 v6, s53, 9 +; CHECK-NEXT: v_writelane_b32 v6, s54, 10 +; CHECK-NEXT: v_writelane_b32 v6, s55, 11 +; CHECK-NEXT: v_writelane_b32 v6, s64, 12 +; CHECK-NEXT: v_writelane_b32 v6, s65, 13 +; CHECK-NEXT: v_writelane_b32 v6, s66, 14 +; CHECK-NEXT: v_writelane_b32 v6, s67, 15 +; CHECK-NEXT: v_writelane_b32 v6, s68, 16 +; CHECK-NEXT: v_writelane_b32 v6, s69, 17 +; CHECK-NEXT: v_writelane_b32 v6, s70, 18 +; CHECK-NEXT: v_writelane_b32 v6, s71, 19 +; CHECK-NEXT: v_writelane_b32 v6, s30, 20 +; CHECK-NEXT: v_writelane_b32 v6, s31, 21 ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_mov_b64 s[8:9], 0 -; CHECK-NEXT: v_writelane_b32 v6, s69, 19 ; CHECK-NEXT: s_mov_b32 s68, 0 ; CHECK-NEXT: s_mov_b32 s69, s4 ; CHECK-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0 @@ -40,11 +42,11 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x130 ; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane -; CHECK-NEXT: v_writelane_b32 v6, s70, 20 -; CHECK-NEXT: v_writelane_b32 v6, s71, 21 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_mov_b32_e32 v1, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: s_load_dwordx16 s[36:51], s[68:69], 0x2f0 +; CHECK-NEXT: s_mov_b32 s70, s68 ; CHECK-NEXT: v_writelane_b32 v7, s8, 0 ; CHECK-NEXT: v_writelane_b32 v7, s9, 1 ; CHECK-NEXT: v_writelane_b32 v7, s10, 2 @@ -77,9 +79,7 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: v_writelane_b32 v7, s65, 29 ; CHECK-NEXT: v_writelane_b32 v7, s66, 30 ; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x1f0 -; CHECK-NEXT: s_load_dwordx16 s[36:51], s[68:69], 0x2f0 ; CHECK-NEXT: s_mov_b32 s69, s68 -; CHECK-NEXT: s_mov_b32 s70, s68 ; CHECK-NEXT: s_mov_b32 s71, s68 ; CHECK-NEXT: v_writelane_b32 v7, s67, 31 ; CHECK-NEXT: image_sample_lz v3, v[1:2], s[60:67], s[68:71] dmask:0x1 @@ -225,29 +225,29 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: .LBB0_10: ; %UnifiedReturnBlock ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] -; CHECK-NEXT: v_readlane_b32 s71, v6, 21 -; CHECK-NEXT: v_readlane_b32 s70, v6, 20 -; CHECK-NEXT: v_readlane_b32 s69, v6, 19 -; CHECK-NEXT: v_readlane_b32 s68, v6, 18 -; CHECK-NEXT: v_readlane_b32 s67, v6, 17 -; CHECK-NEXT: v_readlane_b32 s66, v6, 16 -; CHECK-NEXT: v_readlane_b32 s65, v6, 15 -; CHECK-NEXT: v_readlane_b32 s64, v6, 14 -; CHECK-NEXT: v_readlane_b32 s55, v6, 13 -; CHECK-NEXT: v_readlane_b32 s54, v6, 12 -; CHECK-NEXT: v_readlane_b32 s53, v6, 11 -; CHECK-NEXT: v_readlane_b32 s52, v6, 10 +; CHECK-NEXT: v_readlane_b32 s30, v6, 20 +; CHECK-NEXT: v_readlane_b32 s31, v6, 21 +; CHECK-NEXT: v_readlane_b32 s71, v6, 19 +; CHECK-NEXT: v_readlane_b32 s70, v6, 18 +; CHECK-NEXT: v_readlane_b32 s69, v6, 17 +; CHECK-NEXT: v_readlane_b32 s68, v6, 16 +; CHECK-NEXT: v_readlane_b32 s67, v6, 15 +; CHECK-NEXT: v_readlane_b32 s66, v6, 14 +; CHECK-NEXT: v_readlane_b32 s65, v6, 13 +; CHECK-NEXT: v_readlane_b32 s64, v6, 12 +; CHECK-NEXT: v_readlane_b32 s55, v6, 11 +; CHECK-NEXT: v_readlane_b32 s54, v6, 10 +; CHECK-NEXT: v_readlane_b32 s53, v6, 9 +; CHECK-NEXT: v_readlane_b32 s52, v6, 8 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_readlane_b32 s51, v6, 9 -; CHECK-NEXT: v_readlane_b32 s50, v6, 8 -; CHECK-NEXT: v_readlane_b32 s49, v6, 7 -; CHECK-NEXT: v_readlane_b32 s48, v6, 6 -; CHECK-NEXT: v_readlane_b32 s39, v6, 5 -; CHECK-NEXT: v_readlane_b32 s38, v6, 4 -; CHECK-NEXT: v_readlane_b32 s37, v6, 3 -; CHECK-NEXT: v_readlane_b32 s36, v6, 2 -; CHECK-NEXT: v_readlane_b32 s31, v6, 1 -; CHECK-NEXT: v_readlane_b32 s30, v6, 0 +; CHECK-NEXT: v_readlane_b32 s51, v6, 7 +; CHECK-NEXT: v_readlane_b32 s50, v6, 6 +; CHECK-NEXT: v_readlane_b32 s49, v6, 5 +; CHECK-NEXT: v_readlane_b32 s48, v6, 4 +; CHECK-NEXT: v_readlane_b32 s39, v6, 3 +; CHECK-NEXT: v_readlane_b32 s38, v6, 2 +; CHECK-NEXT: v_readlane_b32 s37, v6, 1 +; CHECK-NEXT: v_readlane_b32 s36, v6, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll index a208cfdb197af..2aaaff1ecc407 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -128,24 +128,24 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 18 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 -; GCN-NEXT: v_writelane_b32 v40, s64, 16 -; GCN-NEXT: v_writelane_b32 v40, s65, 17 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s64, 14 +; GCN-NEXT: v_writelane_b32 v40, s65, 15 +; GCN-NEXT: v_writelane_b32 v40, s30, 16 +; GCN-NEXT: v_writelane_b32 v40, s31, 17 ; GCN-NEXT: s_mov_b32 s50, s15 ; GCN-NEXT: s_mov_b32 s51, s14 ; GCN-NEXT: s_mov_b32 s52, s13 @@ -175,24 +175,24 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB2_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[54:55] -; GCN-NEXT: v_readlane_b32 s65, v40, 17 -; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 16 +; GCN-NEXT: v_readlane_b32 s31, v40, 17 +; GCN-NEXT: v_readlane_b32 s65, v40, 15 +; GCN-NEXT: v_readlane_b32 s64, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 18 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -212,24 +212,24 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: s_mov_b64 exec, s[18:19] ; GISEL-NEXT: v_writelane_b32 v40, s16, 18 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 -; GISEL-NEXT: v_writelane_b32 v40, s64, 16 -; GISEL-NEXT: v_writelane_b32 v40, s65, 17 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s64, 14 +; GISEL-NEXT: v_writelane_b32 v40, s65, 15 +; GISEL-NEXT: v_writelane_b32 v40, s30, 16 +; GISEL-NEXT: v_writelane_b32 v40, s31, 17 ; GISEL-NEXT: s_mov_b32 s50, s15 ; GISEL-NEXT: s_mov_b32 s51, s14 ; GISEL-NEXT: s_mov_b32 s52, s13 @@ -259,24 +259,24 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB2_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[54:55] -; GISEL-NEXT: v_readlane_b32 s65, v40, 17 -; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 16 +; GISEL-NEXT: v_readlane_b32 s31, v40, 17 +; GISEL-NEXT: v_readlane_b32 s65, v40, 15 +; GISEL-NEXT: v_readlane_b32 s64, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s4, v40, 18 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -300,24 +300,24 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 18 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 -; GCN-NEXT: v_writelane_b32 v40, s64, 16 -; GCN-NEXT: v_writelane_b32 v40, s65, 17 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s64, 14 +; GCN-NEXT: v_writelane_b32 v40, s65, 15 +; GCN-NEXT: v_writelane_b32 v40, s30, 16 +; GCN-NEXT: v_writelane_b32 v40, s31, 17 ; GCN-NEXT: s_mov_b32 s50, s15 ; GCN-NEXT: s_mov_b32 s51, s14 ; GCN-NEXT: s_mov_b32 s52, s13 @@ -350,24 +350,24 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB3_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[54:55] -; GCN-NEXT: v_readlane_b32 s65, v40, 17 -; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 16 +; GCN-NEXT: v_readlane_b32 s31, v40, 17 +; GCN-NEXT: v_readlane_b32 s65, v40, 15 +; GCN-NEXT: v_readlane_b32 s64, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 18 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -387,24 +387,24 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GISEL-NEXT: s_mov_b64 exec, s[18:19] ; GISEL-NEXT: v_writelane_b32 v40, s16, 18 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 -; GISEL-NEXT: v_writelane_b32 v40, s64, 16 -; GISEL-NEXT: v_writelane_b32 v40, s65, 17 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s64, 14 +; GISEL-NEXT: v_writelane_b32 v40, s65, 15 +; GISEL-NEXT: v_writelane_b32 v40, s30, 16 +; GISEL-NEXT: v_writelane_b32 v40, s31, 17 ; GISEL-NEXT: s_mov_b32 s50, s15 ; GISEL-NEXT: s_mov_b32 s51, s14 ; GISEL-NEXT: s_mov_b32 s52, s13 @@ -435,24 +435,24 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB3_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[54:55] -; GISEL-NEXT: v_readlane_b32 s65, v40, 17 -; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 16 +; GISEL-NEXT: v_readlane_b32 s31, v40, 17 +; GISEL-NEXT: v_readlane_b32 s65, v40, 15 +; GISEL-NEXT: v_readlane_b32 s64, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s4, v40, 18 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -476,24 +476,24 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 18 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 -; GCN-NEXT: v_writelane_b32 v40, s64, 16 -; GCN-NEXT: v_writelane_b32 v40, s65, 17 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s64, 14 +; GCN-NEXT: v_writelane_b32 v40, s65, 15 +; GCN-NEXT: v_writelane_b32 v40, s30, 16 +; GCN-NEXT: v_writelane_b32 v40, s31, 17 ; GCN-NEXT: s_mov_b32 s50, s15 ; GCN-NEXT: s_mov_b32 s51, s14 ; GCN-NEXT: s_mov_b32 s52, s13 @@ -525,24 +525,24 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[54:55] ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GCN-NEXT: v_readlane_b32 s65, v40, 17 -; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 16 +; GCN-NEXT: v_readlane_b32 s31, v40, 17 +; GCN-NEXT: v_readlane_b32 s65, v40, 15 +; GCN-NEXT: v_readlane_b32 s64, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 18 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -562,24 +562,24 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GISEL-NEXT: s_mov_b64 exec, s[18:19] ; GISEL-NEXT: v_writelane_b32 v40, s16, 18 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 -; GISEL-NEXT: v_writelane_b32 v40, s64, 16 -; GISEL-NEXT: v_writelane_b32 v40, s65, 17 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s64, 14 +; GISEL-NEXT: v_writelane_b32 v40, s65, 15 +; GISEL-NEXT: v_writelane_b32 v40, s30, 16 +; GISEL-NEXT: v_writelane_b32 v40, s31, 17 ; GISEL-NEXT: s_mov_b32 s50, s15 ; GISEL-NEXT: s_mov_b32 s51, s14 ; GISEL-NEXT: s_mov_b32 s52, s13 @@ -611,24 +611,24 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[54:55] ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1 -; GISEL-NEXT: v_readlane_b32 s65, v40, 17 -; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 16 +; GISEL-NEXT: v_readlane_b32 s31, v40, 17 +; GISEL-NEXT: v_readlane_b32 s65, v40, 15 +; GISEL-NEXT: v_readlane_b32 s64, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s4, v40, 18 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -653,26 +653,26 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 20 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 -; GCN-NEXT: v_writelane_b32 v40, s64, 16 -; GCN-NEXT: v_writelane_b32 v40, s65, 17 -; GCN-NEXT: v_writelane_b32 v40, s66, 18 -; GCN-NEXT: v_writelane_b32 v40, s67, 19 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s64, 14 +; GCN-NEXT: v_writelane_b32 v40, s65, 15 +; GCN-NEXT: v_writelane_b32 v40, s66, 16 +; GCN-NEXT: v_writelane_b32 v40, s67, 17 +; GCN-NEXT: v_writelane_b32 v40, s30, 18 +; GCN-NEXT: v_writelane_b32 v40, s31, 19 ; GCN-NEXT: s_mov_b32 s50, s15 ; GCN-NEXT: s_mov_b32 s51, s14 ; GCN-NEXT: s_mov_b32 s52, s13 @@ -709,26 +709,26 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GCN-NEXT: s_mov_b64 exec, s[64:65] ; GCN-NEXT: .LBB5_4: ; %bb2 ; GCN-NEXT: s_or_b64 exec, exec, s[54:55] -; GCN-NEXT: v_readlane_b32 s67, v40, 19 -; GCN-NEXT: v_readlane_b32 s66, v40, 18 -; GCN-NEXT: v_readlane_b32 s65, v40, 17 -; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 18 +; GCN-NEXT: v_readlane_b32 s31, v40, 19 +; GCN-NEXT: v_readlane_b32 s67, v40, 17 +; GCN-NEXT: v_readlane_b32 s66, v40, 16 +; GCN-NEXT: v_readlane_b32 s65, v40, 15 +; GCN-NEXT: v_readlane_b32 s64, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 20 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -748,26 +748,26 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GISEL-NEXT: s_mov_b64 exec, s[18:19] ; GISEL-NEXT: v_writelane_b32 v40, s16, 20 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 -; GISEL-NEXT: v_writelane_b32 v40, s64, 16 -; GISEL-NEXT: v_writelane_b32 v40, s65, 17 -; GISEL-NEXT: v_writelane_b32 v40, s66, 18 -; GISEL-NEXT: v_writelane_b32 v40, s67, 19 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s64, 14 +; GISEL-NEXT: v_writelane_b32 v40, s65, 15 +; GISEL-NEXT: v_writelane_b32 v40, s66, 16 +; GISEL-NEXT: v_writelane_b32 v40, s67, 17 +; GISEL-NEXT: v_writelane_b32 v40, s30, 18 +; GISEL-NEXT: v_writelane_b32 v40, s31, 19 ; GISEL-NEXT: s_mov_b32 s50, s15 ; GISEL-NEXT: s_mov_b32 s51, s14 ; GISEL-NEXT: s_mov_b32 s52, s13 @@ -804,26 +804,26 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GISEL-NEXT: s_mov_b64 exec, s[64:65] ; GISEL-NEXT: .LBB5_4: ; %bb2 ; GISEL-NEXT: s_or_b64 exec, exec, s[54:55] -; GISEL-NEXT: v_readlane_b32 s67, v40, 19 -; GISEL-NEXT: v_readlane_b32 s66, v40, 18 -; GISEL-NEXT: v_readlane_b32 s65, v40, 17 -; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 18 +; GISEL-NEXT: v_readlane_b32 s31, v40, 19 +; GISEL-NEXT: v_readlane_b32 s67, v40, 17 +; GISEL-NEXT: v_readlane_b32 s66, v40, 16 +; GISEL-NEXT: v_readlane_b32 s65, v40, 15 +; GISEL-NEXT: v_readlane_b32 s64, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: v_readlane_b32 s4, v40, 20 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -853,22 +853,22 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s30, 14 +; GCN-NEXT: v_writelane_b32 v40, s31, 15 ; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s8, v0 @@ -882,22 +882,22 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB6_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 14 +; GCN-NEXT: v_readlane_b32 s31, v40, 15 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload @@ -915,22 +915,22 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s30, 14 +; GISEL-NEXT: v_writelane_b32 v40, s31, 15 ; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s8, v0 @@ -944,22 +944,22 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB6_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 14 +; GISEL-NEXT: v_readlane_b32 s31, v40, 15 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload @@ -982,22 +982,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v41, s30, 0 -; GCN-NEXT: v_writelane_b32 v41, s31, 1 -; GCN-NEXT: v_writelane_b32 v41, s34, 2 -; GCN-NEXT: v_writelane_b32 v41, s35, 3 -; GCN-NEXT: v_writelane_b32 v41, s36, 4 -; GCN-NEXT: v_writelane_b32 v41, s37, 5 -; GCN-NEXT: v_writelane_b32 v41, s38, 6 -; GCN-NEXT: v_writelane_b32 v41, s39, 7 -; GCN-NEXT: v_writelane_b32 v41, s48, 8 -; GCN-NEXT: v_writelane_b32 v41, s49, 9 -; GCN-NEXT: v_writelane_b32 v41, s50, 10 -; GCN-NEXT: v_writelane_b32 v41, s51, 11 -; GCN-NEXT: v_writelane_b32 v41, s52, 12 -; GCN-NEXT: v_writelane_b32 v41, s53, 13 -; GCN-NEXT: v_writelane_b32 v41, s54, 14 -; GCN-NEXT: v_writelane_b32 v41, s55, 15 +; GCN-NEXT: v_writelane_b32 v41, s34, 0 +; GCN-NEXT: v_writelane_b32 v41, s35, 1 +; GCN-NEXT: v_writelane_b32 v41, s36, 2 +; GCN-NEXT: v_writelane_b32 v41, s37, 3 +; GCN-NEXT: v_writelane_b32 v41, s38, 4 +; GCN-NEXT: v_writelane_b32 v41, s39, 5 +; GCN-NEXT: v_writelane_b32 v41, s48, 6 +; GCN-NEXT: v_writelane_b32 v41, s49, 7 +; GCN-NEXT: v_writelane_b32 v41, s50, 8 +; GCN-NEXT: v_writelane_b32 v41, s51, 9 +; GCN-NEXT: v_writelane_b32 v41, s52, 10 +; GCN-NEXT: v_writelane_b32 v41, s53, 11 +; GCN-NEXT: v_writelane_b32 v41, s54, 12 +; GCN-NEXT: v_writelane_b32 v41, s55, 13 +; GCN-NEXT: v_writelane_b32 v41, s30, 14 +; GCN-NEXT: v_writelane_b32 v41, s31, 15 ; GCN-NEXT: v_mov_b32_e32 v40, v0 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 @@ -1013,22 +1013,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v40 -; GCN-NEXT: v_readlane_b32 s55, v41, 15 -; GCN-NEXT: v_readlane_b32 s54, v41, 14 -; GCN-NEXT: v_readlane_b32 s53, v41, 13 -; GCN-NEXT: v_readlane_b32 s52, v41, 12 -; GCN-NEXT: v_readlane_b32 s51, v41, 11 -; GCN-NEXT: v_readlane_b32 s50, v41, 10 -; GCN-NEXT: v_readlane_b32 s49, v41, 9 -; GCN-NEXT: v_readlane_b32 s48, v41, 8 -; GCN-NEXT: v_readlane_b32 s39, v41, 7 -; GCN-NEXT: v_readlane_b32 s38, v41, 6 -; GCN-NEXT: v_readlane_b32 s37, v41, 5 -; GCN-NEXT: v_readlane_b32 s36, v41, 4 -; GCN-NEXT: v_readlane_b32 s35, v41, 3 -; GCN-NEXT: v_readlane_b32 s34, v41, 2 -; GCN-NEXT: v_readlane_b32 s31, v41, 1 -; GCN-NEXT: v_readlane_b32 s30, v41, 0 +; GCN-NEXT: v_readlane_b32 s30, v41, 14 +; GCN-NEXT: v_readlane_b32 s31, v41, 15 +; GCN-NEXT: v_readlane_b32 s55, v41, 13 +; GCN-NEXT: v_readlane_b32 s54, v41, 12 +; GCN-NEXT: v_readlane_b32 s53, v41, 11 +; GCN-NEXT: v_readlane_b32 s52, v41, 10 +; GCN-NEXT: v_readlane_b32 s51, v41, 9 +; GCN-NEXT: v_readlane_b32 s50, v41, 8 +; GCN-NEXT: v_readlane_b32 s49, v41, 7 +; GCN-NEXT: v_readlane_b32 s48, v41, 6 +; GCN-NEXT: v_readlane_b32 s39, v41, 5 +; GCN-NEXT: v_readlane_b32 s38, v41, 4 +; GCN-NEXT: v_readlane_b32 s37, v41, 3 +; GCN-NEXT: v_readlane_b32 s36, v41, 2 +; GCN-NEXT: v_readlane_b32 s35, v41, 1 +; GCN-NEXT: v_readlane_b32 s34, v41, 0 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -1048,22 +1048,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: v_writelane_b32 v41, s30, 0 -; GISEL-NEXT: v_writelane_b32 v41, s31, 1 -; GISEL-NEXT: v_writelane_b32 v41, s34, 2 -; GISEL-NEXT: v_writelane_b32 v41, s35, 3 -; GISEL-NEXT: v_writelane_b32 v41, s36, 4 -; GISEL-NEXT: v_writelane_b32 v41, s37, 5 -; GISEL-NEXT: v_writelane_b32 v41, s38, 6 -; GISEL-NEXT: v_writelane_b32 v41, s39, 7 -; GISEL-NEXT: v_writelane_b32 v41, s48, 8 -; GISEL-NEXT: v_writelane_b32 v41, s49, 9 -; GISEL-NEXT: v_writelane_b32 v41, s50, 10 -; GISEL-NEXT: v_writelane_b32 v41, s51, 11 -; GISEL-NEXT: v_writelane_b32 v41, s52, 12 -; GISEL-NEXT: v_writelane_b32 v41, s53, 13 -; GISEL-NEXT: v_writelane_b32 v41, s54, 14 -; GISEL-NEXT: v_writelane_b32 v41, s55, 15 +; GISEL-NEXT: v_writelane_b32 v41, s34, 0 +; GISEL-NEXT: v_writelane_b32 v41, s35, 1 +; GISEL-NEXT: v_writelane_b32 v41, s36, 2 +; GISEL-NEXT: v_writelane_b32 v41, s37, 3 +; GISEL-NEXT: v_writelane_b32 v41, s38, 4 +; GISEL-NEXT: v_writelane_b32 v41, s39, 5 +; GISEL-NEXT: v_writelane_b32 v41, s48, 6 +; GISEL-NEXT: v_writelane_b32 v41, s49, 7 +; GISEL-NEXT: v_writelane_b32 v41, s50, 8 +; GISEL-NEXT: v_writelane_b32 v41, s51, 9 +; GISEL-NEXT: v_writelane_b32 v41, s52, 10 +; GISEL-NEXT: v_writelane_b32 v41, s53, 11 +; GISEL-NEXT: v_writelane_b32 v41, s54, 12 +; GISEL-NEXT: v_writelane_b32 v41, s55, 13 +; GISEL-NEXT: v_writelane_b32 v41, s30, 14 +; GISEL-NEXT: v_writelane_b32 v41, s31, 15 ; GISEL-NEXT: v_mov_b32_e32 v40, v0 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 @@ -1079,22 +1079,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v40 -; GISEL-NEXT: v_readlane_b32 s55, v41, 15 -; GISEL-NEXT: v_readlane_b32 s54, v41, 14 -; GISEL-NEXT: v_readlane_b32 s53, v41, 13 -; GISEL-NEXT: v_readlane_b32 s52, v41, 12 -; GISEL-NEXT: v_readlane_b32 s51, v41, 11 -; GISEL-NEXT: v_readlane_b32 s50, v41, 10 -; GISEL-NEXT: v_readlane_b32 s49, v41, 9 -; GISEL-NEXT: v_readlane_b32 s48, v41, 8 -; GISEL-NEXT: v_readlane_b32 s39, v41, 7 -; GISEL-NEXT: v_readlane_b32 s38, v41, 6 -; GISEL-NEXT: v_readlane_b32 s37, v41, 5 -; GISEL-NEXT: v_readlane_b32 s36, v41, 4 -; GISEL-NEXT: v_readlane_b32 s35, v41, 3 -; GISEL-NEXT: v_readlane_b32 s34, v41, 2 -; GISEL-NEXT: v_readlane_b32 s31, v41, 1 -; GISEL-NEXT: v_readlane_b32 s30, v41, 0 +; GISEL-NEXT: v_readlane_b32 s30, v41, 14 +; GISEL-NEXT: v_readlane_b32 s31, v41, 15 +; GISEL-NEXT: v_readlane_b32 s55, v41, 13 +; GISEL-NEXT: v_readlane_b32 s54, v41, 12 +; GISEL-NEXT: v_readlane_b32 s53, v41, 11 +; GISEL-NEXT: v_readlane_b32 s52, v41, 10 +; GISEL-NEXT: v_readlane_b32 s51, v41, 9 +; GISEL-NEXT: v_readlane_b32 s50, v41, 8 +; GISEL-NEXT: v_readlane_b32 s49, v41, 7 +; GISEL-NEXT: v_readlane_b32 s48, v41, 6 +; GISEL-NEXT: v_readlane_b32 s39, v41, 5 +; GISEL-NEXT: v_readlane_b32 s38, v41, 4 +; GISEL-NEXT: v_readlane_b32 s37, v41, 3 +; GISEL-NEXT: v_readlane_b32 s36, v41, 2 +; GISEL-NEXT: v_readlane_b32 s35, v41, 1 +; GISEL-NEXT: v_readlane_b32 s34, v41, 0 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -1121,22 +1121,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s30, 14 +; GCN-NEXT: v_writelane_b32 v40, s31, 15 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s8, v1 @@ -1152,22 +1152,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 14 +; GCN-NEXT: v_readlane_b32 s31, v40, 15 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload @@ -1185,22 +1185,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s30, 14 +; GISEL-NEXT: v_writelane_b32 v40, s31, 15 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s8, v1 @@ -1216,22 +1216,22 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v2 -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 14 +; GISEL-NEXT: v_readlane_b32 s31, v40, 15 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload @@ -1254,22 +1254,22 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s48, 8 -; GCN-NEXT: v_writelane_b32 v40, s49, 9 -; GCN-NEXT: v_writelane_b32 v40, s50, 10 -; GCN-NEXT: v_writelane_b32 v40, s51, 11 -; GCN-NEXT: v_writelane_b32 v40, s52, 12 -; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s54, 14 -; GCN-NEXT: v_writelane_b32 v40, s55, 15 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s48, 6 +; GCN-NEXT: v_writelane_b32 v40, s49, 7 +; GCN-NEXT: v_writelane_b32 v40, s50, 8 +; GCN-NEXT: v_writelane_b32 v40, s51, 9 +; GCN-NEXT: v_writelane_b32 v40, s52, 10 +; GCN-NEXT: v_writelane_b32 v40, s53, 11 +; GCN-NEXT: v_writelane_b32 v40, s54, 12 +; GCN-NEXT: v_writelane_b32 v40, s55, 13 +; GCN-NEXT: v_writelane_b32 v40, s30, 14 +; GCN-NEXT: v_writelane_b32 v40, s31, 15 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s6, v0 @@ -1282,22 +1282,22 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB9_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_readlane_b32 s55, v40, 15 -; GCN-NEXT: v_readlane_b32 s54, v40, 14 -; GCN-NEXT: v_readlane_b32 s53, v40, 13 -; GCN-NEXT: v_readlane_b32 s52, v40, 12 -; GCN-NEXT: v_readlane_b32 s51, v40, 11 -; GCN-NEXT: v_readlane_b32 s50, v40, 10 -; GCN-NEXT: v_readlane_b32 s49, v40, 9 -; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s30, v40, 14 +; GCN-NEXT: v_readlane_b32 s31, v40, 15 +; GCN-NEXT: v_readlane_b32 s55, v40, 13 +; GCN-NEXT: v_readlane_b32 s54, v40, 12 +; GCN-NEXT: v_readlane_b32 s53, v40, 11 +; GCN-NEXT: v_readlane_b32 s52, v40, 10 +; GCN-NEXT: v_readlane_b32 s51, v40, 9 +; GCN-NEXT: v_readlane_b32 s50, v40, 8 +; GCN-NEXT: v_readlane_b32 s49, v40, 7 +; GCN-NEXT: v_readlane_b32 s48, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload @@ -1315,22 +1315,22 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s48, 8 -; GISEL-NEXT: v_writelane_b32 v40, s49, 9 -; GISEL-NEXT: v_writelane_b32 v40, s50, 10 -; GISEL-NEXT: v_writelane_b32 v40, s51, 11 -; GISEL-NEXT: v_writelane_b32 v40, s52, 12 -; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s54, 14 -; GISEL-NEXT: v_writelane_b32 v40, s55, 15 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s48, 6 +; GISEL-NEXT: v_writelane_b32 v40, s49, 7 +; GISEL-NEXT: v_writelane_b32 v40, s50, 8 +; GISEL-NEXT: v_writelane_b32 v40, s51, 9 +; GISEL-NEXT: v_writelane_b32 v40, s52, 10 +; GISEL-NEXT: v_writelane_b32 v40, s53, 11 +; GISEL-NEXT: v_writelane_b32 v40, s54, 12 +; GISEL-NEXT: v_writelane_b32 v40, s55, 13 +; GISEL-NEXT: v_writelane_b32 v40, s30, 14 +; GISEL-NEXT: v_writelane_b32 v40, s31, 15 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s6, v0 @@ -1343,22 +1343,22 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB9_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_readlane_b32 s55, v40, 15 -; GISEL-NEXT: v_readlane_b32 s54, v40, 14 -; GISEL-NEXT: v_readlane_b32 s53, v40, 13 -; GISEL-NEXT: v_readlane_b32 s52, v40, 12 -; GISEL-NEXT: v_readlane_b32 s51, v40, 11 -; GISEL-NEXT: v_readlane_b32 s50, v40, 10 -; GISEL-NEXT: v_readlane_b32 s49, v40, 9 -; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s30, v40, 14 +; GISEL-NEXT: v_readlane_b32 s31, v40, 15 +; GISEL-NEXT: v_readlane_b32 s55, v40, 13 +; GISEL-NEXT: v_readlane_b32 s54, v40, 12 +; GISEL-NEXT: v_readlane_b32 s53, v40, 11 +; GISEL-NEXT: v_readlane_b32 s52, v40, 10 +; GISEL-NEXT: v_readlane_b32 s51, v40, 9 +; GISEL-NEXT: v_readlane_b32 s50, v40, 8 +; GISEL-NEXT: v_readlane_b32 s49, v40, 7 +; GISEL-NEXT: v_readlane_b32 s48, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_mov_b32 s32, s33 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir b/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir index 4d8fb8db624f8..2872cfd212273 100644 --- a/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir @@ -19,6 +19,8 @@ body: | ; CHECK-LABEL: name: av_mov_b32_split ; CHECK: liveins: $agpr3, $agpr4, $vgpr0, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec ; CHECK-NEXT: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec ; CHECK-NEXT: renamable $agpr2 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec @@ -68,6 +70,8 @@ body: | ; CHECK-LABEL: name: v_mov_b32_split ; CHECK: liveins: $agpr3, $agpr4, $vgpr0, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: renamable $vgpr1 = V_MOV_B32_e32 1, implicit $exec ; CHECK-NEXT: renamable $vgpr2 = V_MOV_B32_e32 2, implicit $exec @@ -120,6 +124,8 @@ body: | ; CHECK-LABEL: name: av_mov_b64_split ; CHECK: liveins: $agpr6, $agpr7, $agpr8, $agpr9, $vgpr0, $sgpr4_sgpr5 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec, implicit-def $agpr0_agpr1 ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec, implicit-def $agpr0_agpr1 ; CHECK-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec, implicit-def $agpr2_agpr3 diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll index c3f391786f878..3be6682bc4ffa 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll @@ -25,18 +25,17 @@ define void @f0() { ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_store_b32 off, v4, s33 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v4, s30, 0 ; GFX11-NEXT: s_add_i32 s32, s32, 16 +; GFX11-NEXT: v_writelane_b32 v4, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, f1@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, f1@gotpcrel32@hi+12 -; GFX11-NEXT: v_writelane_b32 v4, s30, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v4, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s31, v4, 1 ; GFX11-NEXT: v_readlane_b32 s30, v4, 0 +; GFX11-NEXT: v_readlane_b32 s31, v4, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v4, off, s33 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll index 58cd2f5bc11af..e6d93d857d5a0 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll @@ -7,16 +7,230 @@ define fastcc i32 @foo() { ; CHECK-LABEL: name: foo ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; CHECK-NEXT: S_WAITCNT 0 ; CHECK-NEXT: $sgpr16 = S_MOV_B32 $sgpr33 ; CHECK-NEXT: $sgpr33 = S_MOV_B32 $sgpr32 ; CHECK-NEXT: $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 0 ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr17 - ; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr33, $vgpr40, 2, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 + ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 + ; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc + ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $pc_reg, $vgpr127, 0, 32, $vgpr127, 1, 32 ; CHECK-NEXT: BUNDLE implicit-def $sgpr16_sgpr17, implicit-def $sgpr16, implicit-def $scc, implicit-def $sgpr17 { ; CHECK-NEXT: $sgpr16_sgpr17 = S_GETPC_B64 ; CHECK-NEXT: $sgpr16 = S_ADD_U32 internal $sgpr16, target-flags(amdgpu-gotprel32-lo) @bar + 4, implicit-def $scc @@ -26,8 +240,6 @@ define fastcc i32 @foo() { ; CHECK-NEXT: BUFFER_GL1_INV implicit $exec ; CHECK-NEXT: BUFFER_GL0_INV implicit $exec ; CHECK-NEXT: renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM killed renamable $sgpr16_sgpr17, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) - ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr40 - ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40 ; CHECK-NEXT: S_WAITCNT 49279 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @bar, csr_amdgpu, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit killed $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $vcc_lo = S_MOV_B32 $exec_lo @@ -39,13 +251,14 @@ define fastcc i32 @foo() { ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.DummyReturnBlock: + ; CHECK-NEXT: $sgpr30 = V_READLANE_B32 $vgpr40, 0, implicit-def $sgpr30_sgpr31 ; CHECK-NEXT: $sgpr31 = V_READLANE_B32 $vgpr40, 1 - ; CHECK-NEXT: $sgpr30 = V_READLANE_B32 $vgpr40, 0 ; CHECK-NEXT: $sgpr32 = S_MOV_B32 $sgpr33 ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr40, 2 ; CHECK-NEXT: $sgpr5 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr5 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = S_MOV_B32 killed $sgpr4 ; CHECK-NEXT: S_WAITCNT 16240 ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit undef $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir index 786ce40203836..e44736584767b 100644 --- a/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir +++ b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir @@ -14,6 +14,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0, $vgpr2 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir index 86b6c5982b4cb..55f21d95bcac4 100644 --- a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir +++ b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir @@ -19,6 +19,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -67,6 +69,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -115,6 +119,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -164,6 +170,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -215,6 +223,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -270,8 +280,217 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr35 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr36 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr37 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr38 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr39 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr144 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr145 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr146 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr147 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr148 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr149 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr150 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr151 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr160 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr161 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr162 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr163 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr164 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr165 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr166 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr167 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr176 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr177 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr178 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr179 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr180 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr181 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr182 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr183 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr193 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr194 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr195 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr196 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr197 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr198 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr199 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr208 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr209 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr210 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr211 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr212 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr213 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr214 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr215 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr224 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr225 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr226 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr227 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr228 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr229 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr230 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr231 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 0 ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40 ; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40 diff --git a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir index 7a913cf50ea2b..f96c3c56896c0 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir +++ b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir @@ -31,6 +31,8 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: $sgpr33 = S_MOV_B32 0 ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll index d1ba892d7f7e1..2f4d5ee3cbce5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll @@ -984,10 +984,6 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-SDAG-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; CHECK-SDAG-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 -; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 -; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 -; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s36, 0 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s37, 1 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s38, 2 @@ -1004,6 +1000,10 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s65, 13 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s66, 14 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s67, 15 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 +; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 +; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 +; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s64, v30 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s55, v21 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s54, v20 @@ -1070,10 +1070,6 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; CHECK-GISEL-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s36, 0 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s36, v2 -; CHECK-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], s32 -; CHECK-GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 -; CHECK-GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s37, 1 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s38, 2 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s39, 3 @@ -1089,6 +1085,10 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s65, 13 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s66, 14 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s67, 15 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s36, v2 +; CHECK-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; CHECK-GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 +; CHECK-GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s37, v3 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s38, v4 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s39, v5 @@ -1429,10 +1429,6 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-SDAG-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; CHECK-SDAG-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 -; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 -; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 -; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s36, 0 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s37, 1 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s38, 2 @@ -1449,6 +1445,10 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s65, 13 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s66, 14 ; CHECK-SDAG-NEXT: v_writelane_b32 v31, s67, 15 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 +; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 +; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 +; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s64, v30 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s55, v21 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s54, v20 @@ -1515,10 +1515,6 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-GISEL-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; CHECK-GISEL-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s36, 0 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s36, v2 -; CHECK-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], s32 -; CHECK-GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 -; CHECK-GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s37, 1 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s38, 2 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s39, 3 @@ -1534,6 +1530,10 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s65, 13 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s66, 14 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s67, 15 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s36, v2 +; CHECK-GISEL-NEXT: buffer_load_dword v0, off, s[0:3], s32 +; CHECK-GISEL-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 +; CHECK-GISEL-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s37, v3 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s38, v4 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s39, v5 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll index 4d23fb116cd03..294d8bbbeba63 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll @@ -22,6 +22,8 @@ entry: ; GCN-LABEL: {{^}}only_undef_dbg_value: ; NOOPT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- undef +; NOOPT-NEXT: .cfi_escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 ; CFA is 0 in private_wave aspace +; NOOPT-NEXT: .cfi_undefined 16 ; NOOPT-NEXT: s_endpgm ; OPT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll index f971080e02c5b..72c4397754ce6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll @@ -2375,6 +2375,12 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8 ; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4 ; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16 @@ -2402,12 +2408,6 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:104 ; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:100 -; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: s_waitcnt vmcnt(25) ; GFX950-NEXT: v_max_f64 v[58:59], v[0:1], v[32:33] ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll index dfd67873c3b86..526988d1f36ac 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll @@ -2375,6 +2375,12 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8 ; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4 ; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16 @@ -2402,12 +2408,6 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:104 ; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:100 -; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse ; GFX950-NEXT: s_waitcnt vmcnt(25) ; GFX950-NEXT: v_min_f64 v[58:59], v[0:1], v[32:33] ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33] diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll index 5f0ca7bc42ae0..db80f5479d36b 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -109,15 +109,15 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MUBUF-NEXT: s_mov_b32 s5, s33 ; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0 +; MUBUF-NEXT: s_mov_b32 s6, s34 ; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000 +; MUBUF-NEXT: s_mov_b32 s34, s32 ; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s33 ; MUBUF-NEXT: v_add_u32_e32 v3, 0x3000, v3 -; MUBUF-NEXT: s_mov_b32 s6, s34 ; MUBUF-NEXT: v_add_u32_e32 v2, 64, v3 ; MUBUF-NEXT: v_mov_b32_e32 v3, 0 ; MUBUF-NEXT: v_mov_b32_e32 v4, 0x2000 ; MUBUF-NEXT: s_mov_b32 s4, 0 -; MUBUF-NEXT: s_mov_b32 s34, s32 ; MUBUF-NEXT: s_add_i32 s32, s32, 0x200000 ; MUBUF-NEXT: buffer_store_dword v3, v4, s[0:3], s33 offen ; MUBUF-NEXT: s_waitcnt vmcnt(0) @@ -145,11 +145,11 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_mov_b32 s32, s34 ; MUBUF-NEXT: s_mov_b32 s34, s6 -; MUBUF-NEXT: s_mov_b32 s33, s5 ; MUBUF-NEXT: v_add_co_u32_e32 v2, vcc, v4, v6 ; MUBUF-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v7, vcc ; MUBUF-NEXT: global_store_dwordx2 v[0:1], v[2:3], off ; MUBUF-NEXT: s_waitcnt vmcnt(0) +; MUBUF-NEXT: s_mov_b32 s33, s5 ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: func_local_stack_offset_uses_sp: @@ -157,8 +157,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; FLATSCR-NEXT: s_mov_b32 s2, s33 ; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff -; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000 ; FLATSCR-NEXT: s_mov_b32 s3, s34 +; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000 ; FLATSCR-NEXT: s_mov_b32 s34, s32 ; FLATSCR-NEXT: s_add_i32 s32, s32, 0x8000 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 0 @@ -186,11 +186,11 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_mov_b32 s32, s34 ; FLATSCR-NEXT: s_mov_b32 s34, s3 -; FLATSCR-NEXT: s_mov_b32 s33, s2 ; FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc ; FLATSCR-NEXT: global_store_dwordx2 v[0:1], v[2:3], off ; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: s_mov_b32 s33, s2 ; FLATSCR-NEXT: s_setpc_b64 s[30:31] entry: %pin.low = alloca i32, align 8192, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll index 4b5a7c207055a..52671f5d3deb4 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll @@ -17,8 +17,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART @@ -46,8 +46,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART @@ -74,8 +74,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 +; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s32, 0x4040 @@ -108,9 +109,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: s_add_co_ci_u32 s0, s32, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_wait_alu 0xfffe @@ -139,9 +139,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 -; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND @@ -168,6 +168,7 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART @@ -175,7 +176,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 -; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: v_readfirstlane_b32 s55, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART @@ -196,13 +196,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s32, 0x4040 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 @@ -397,10 +397,10 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_add_i32 s0, s32, 0x4040 -; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 @@ -433,9 +433,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 @@ -467,9 +467,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 @@ -501,8 +501,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0x4080 -; GFX11-NEXT: s_add_i32 s0, s33, 64 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 +; GFX11-NEXT: s_add_i32 s0, s33, 64 +; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s33, 0x4040 @@ -511,7 +512,7 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55, scc @@ -539,8 +540,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 -; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: s_add_co_ci_u32 s0, s33, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s33 ; GFX12-NEXT: s_wait_alu 0xfffe @@ -574,16 +575,16 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 -; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX8-NEXT: s_movk_i32 s55, 0x4040 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s55, v0 -; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX8-NEXT: v_readfirstlane_b32 s55, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART @@ -608,6 +609,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART @@ -615,8 +618,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 -; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 -; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: v_readfirstlane_b32 s55, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART @@ -642,13 +643,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX942-NEXT: scratch_store_dword off, v1, s3 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_addk_i32 s32, 0x4080 +; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_add_i32 s0, s33, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s33, 0x4040 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v1, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 @@ -681,8 +682,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 ; GFX10_1-NEXT: v_readfirstlane_b32 s55, v1 @@ -705,8 +706,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 ; GFX10_3-NEXT: v_readfirstlane_b32 s55, v1 @@ -728,13 +729,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 ; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s32, 64 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s55, scc @@ -804,9 +804,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v0, s55, 0 ; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 -; GFX900-NEXT: v_writelane_b32 v0, s55, 0 ; GFX900-NEXT: v_readfirstlane_b32 s55, v1 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART @@ -827,11 +827,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 ; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s32, 64 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55, scc @@ -989,8 +989,8 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 ; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 +; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55 @@ -1018,9 +1018,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33 -; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 @@ -1047,9 +1047,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33 -; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 @@ -1076,8 +1076,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0x4080 -; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s33, 64 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 @@ -1109,13 +1109,14 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 +; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_mov_b32 s55, s33 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 ; GFX12-NEXT: s_mov_b32 s32, s33 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 @@ -1136,11 +1137,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX8-NEXT: v_writelane_b32 v0, s55, 0 ; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GFX8-NEXT: s_mov_b32 s55, 64 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, s55, v1 -; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX8-NEXT: v_readfirstlane_b32 s55, v1 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART @@ -1165,10 +1166,10 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33 -; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 ; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX900-NEXT: v_writelane_b32 v0, s55, 0 +; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33 +; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 ; GFX900-NEXT: v_readfirstlane_b32 s55, v1 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART @@ -1194,11 +1195,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX942-NEXT: scratch_store_dword off, v0, s3 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_addk_i32 s32, 0x4080 +; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s33, 64 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v0, s55, 0 ; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55, scc @@ -1228,8 +1229,8 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 -; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_1-NEXT: s_lshr_b32 s55, s33, 5 ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: s_add_i32 s55, s55, 64 @@ -1255,8 +1256,8 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 -; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_3-NEXT: s_lshr_b32 s55, s33, 5 ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: s_add_i32 s55, s55, 64 @@ -1281,8 +1282,8 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 ; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: v_writelane_b32 v0, s55, 0 ; GFX11-NEXT: s_addk_i32 s32, 0x4080 +; GFX11-NEXT: v_writelane_b32 v0, s55, 0 ; GFX11-NEXT: s_add_i32 s1, s33, 64 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_mov_b32 s55, s1 @@ -1311,14 +1312,15 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 +; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_mov_b32 s55, s33 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s55 ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_mov_b32 s32, s33 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 +; GFX12-NEXT: s_mov_b32 s32, s33 ; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 ; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe @@ -1390,8 +1392,8 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX942-NEXT: scratch_store_dword off, v0, s1 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[2:3] ; GFX942-NEXT: s_addk_i32 s32, 0x4080 -; GFX942-NEXT: s_add_i32 s1, s33, 64 ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 +; GFX942-NEXT: s_add_i32 s1, s33, 64 ; GFX942-NEXT: s_mov_b32 s55, s1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s55 @@ -1529,8 +1531,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_lshr_b32 s4, s32, 6 ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 +; GFX8-NEXT: s_lshr_b32 s4, s32, 6 ; GFX8-NEXT: s_add_i32 s55, s4, 0x442c ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 @@ -1556,8 +1558,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_lshr_b32 s4, s32, 6 ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 +; GFX900-NEXT: s_lshr_b32 s4, s32, 6 ; GFX900-NEXT: s_add_i32 s55, s4, 0x442c ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 @@ -1677,8 +1679,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX11-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: s_add_i32 s1, s32, 64 ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 +; GFX11-NEXT: s_add_i32 s1, s32, 64 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-NEXT: s_add_i32 s55, s32, s0 @@ -1796,8 +1798,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX942-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX942-NEXT: scratch_store_dword off, v1, s1 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[2:3] -; GFX942-NEXT: s_lshl_b32 s0, s0, 2 ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 +; GFX942-NEXT: s_lshl_b32 s0, s0, 2 ; GFX942-NEXT: s_add_i32 s55, s32, s0 ; GFX942-NEXT: s_addk_i32 s55, 0x4040 ; GFX942-NEXT: s_add_i32 s0, s32, 64 diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll index 17581bcb61e99..b0fee0fe0aa19 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll @@ -37,26 +37,26 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX7-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[4:5] -; GFX7-NEXT: v_writelane_b32 v23, s30, 0 -; GFX7-NEXT: v_writelane_b32 v23, s31, 1 -; GFX7-NEXT: v_writelane_b32 v23, s33, 2 -; GFX7-NEXT: v_writelane_b32 v23, s34, 3 -; GFX7-NEXT: v_writelane_b32 v23, s35, 4 -; GFX7-NEXT: v_writelane_b32 v23, s36, 5 -; GFX7-NEXT: v_writelane_b32 v23, s37, 6 -; GFX7-NEXT: v_writelane_b32 v23, s38, 7 -; GFX7-NEXT: v_writelane_b32 v23, s39, 8 -; GFX7-NEXT: v_writelane_b32 v23, s48, 9 -; GFX7-NEXT: v_writelane_b32 v23, s49, 10 -; GFX7-NEXT: v_writelane_b32 v23, s50, 11 -; GFX7-NEXT: v_writelane_b32 v23, s51, 12 -; GFX7-NEXT: v_writelane_b32 v23, s52, 13 -; GFX7-NEXT: v_writelane_b32 v23, s53, 14 +; GFX7-NEXT: v_writelane_b32 v23, s33, 0 +; GFX7-NEXT: v_writelane_b32 v23, s34, 1 +; GFX7-NEXT: v_writelane_b32 v23, s35, 2 +; GFX7-NEXT: v_writelane_b32 v23, s36, 3 +; GFX7-NEXT: v_writelane_b32 v23, s37, 4 +; GFX7-NEXT: v_writelane_b32 v23, s38, 5 +; GFX7-NEXT: v_writelane_b32 v23, s39, 6 +; GFX7-NEXT: v_writelane_b32 v23, s48, 7 +; GFX7-NEXT: v_writelane_b32 v23, s49, 8 +; GFX7-NEXT: v_writelane_b32 v23, s50, 9 +; GFX7-NEXT: v_writelane_b32 v23, s51, 10 +; GFX7-NEXT: v_writelane_b32 v23, s52, 11 +; GFX7-NEXT: v_writelane_b32 v23, s53, 12 +; GFX7-NEXT: v_writelane_b32 v23, s54, 13 +; GFX7-NEXT: v_writelane_b32 v23, s55, 14 +; GFX7-NEXT: v_writelane_b32 v23, s30, 15 +; GFX7-NEXT: v_writelane_b32 v23, s31, 16 ; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 -; GFX7-NEXT: v_writelane_b32 v23, s54, 15 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec -; GFX7-NEXT: v_writelane_b32 v23, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use alloca0 v0 ; GFX7-NEXT: ;;#ASMEND @@ -73,23 +73,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND -; GFX7-NEXT: v_readlane_b32 s55, v23, 16 -; GFX7-NEXT: v_readlane_b32 s54, v23, 15 -; GFX7-NEXT: v_readlane_b32 s53, v23, 14 -; GFX7-NEXT: v_readlane_b32 s52, v23, 13 -; GFX7-NEXT: v_readlane_b32 s51, v23, 12 -; GFX7-NEXT: v_readlane_b32 s50, v23, 11 -; GFX7-NEXT: v_readlane_b32 s49, v23, 10 -; GFX7-NEXT: v_readlane_b32 s48, v23, 9 -; GFX7-NEXT: v_readlane_b32 s39, v23, 8 -; GFX7-NEXT: v_readlane_b32 s38, v23, 7 -; GFX7-NEXT: v_readlane_b32 s37, v23, 6 -; GFX7-NEXT: v_readlane_b32 s36, v23, 5 -; GFX7-NEXT: v_readlane_b32 s35, v23, 4 -; GFX7-NEXT: v_readlane_b32 s34, v23, 3 -; GFX7-NEXT: v_readlane_b32 s33, v23, 2 -; GFX7-NEXT: v_readlane_b32 s31, v23, 1 -; GFX7-NEXT: v_readlane_b32 s30, v23, 0 +; GFX7-NEXT: v_readlane_b32 s30, v23, 15 +; GFX7-NEXT: v_readlane_b32 s31, v23, 16 +; GFX7-NEXT: v_readlane_b32 s55, v23, 14 +; GFX7-NEXT: v_readlane_b32 s54, v23, 13 +; GFX7-NEXT: v_readlane_b32 s53, v23, 12 +; GFX7-NEXT: v_readlane_b32 s52, v23, 11 +; GFX7-NEXT: v_readlane_b32 s51, v23, 10 +; GFX7-NEXT: v_readlane_b32 s50, v23, 9 +; GFX7-NEXT: v_readlane_b32 s49, v23, 8 +; GFX7-NEXT: v_readlane_b32 s48, v23, 7 +; GFX7-NEXT: v_readlane_b32 s39, v23, 6 +; GFX7-NEXT: v_readlane_b32 s38, v23, 5 +; GFX7-NEXT: v_readlane_b32 s37, v23, 4 +; GFX7-NEXT: v_readlane_b32 s36, v23, 3 +; GFX7-NEXT: v_readlane_b32 s35, v23, 2 +; GFX7-NEXT: v_readlane_b32 s34, v23, 1 +; GFX7-NEXT: v_readlane_b32 s33, v23, 0 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -104,26 +104,26 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v23, s30, 0 -; GFX8-NEXT: v_writelane_b32 v23, s31, 1 -; GFX8-NEXT: v_writelane_b32 v23, s33, 2 -; GFX8-NEXT: v_writelane_b32 v23, s34, 3 -; GFX8-NEXT: v_writelane_b32 v23, s35, 4 -; GFX8-NEXT: v_writelane_b32 v23, s36, 5 -; GFX8-NEXT: v_writelane_b32 v23, s37, 6 -; GFX8-NEXT: v_writelane_b32 v23, s38, 7 -; GFX8-NEXT: v_writelane_b32 v23, s39, 8 -; GFX8-NEXT: v_writelane_b32 v23, s48, 9 -; GFX8-NEXT: v_writelane_b32 v23, s49, 10 -; GFX8-NEXT: v_writelane_b32 v23, s50, 11 -; GFX8-NEXT: v_writelane_b32 v23, s51, 12 -; GFX8-NEXT: v_writelane_b32 v23, s52, 13 -; GFX8-NEXT: v_writelane_b32 v23, s53, 14 +; GFX8-NEXT: v_writelane_b32 v23, s33, 0 +; GFX8-NEXT: v_writelane_b32 v23, s34, 1 +; GFX8-NEXT: v_writelane_b32 v23, s35, 2 +; GFX8-NEXT: v_writelane_b32 v23, s36, 3 +; GFX8-NEXT: v_writelane_b32 v23, s37, 4 +; GFX8-NEXT: v_writelane_b32 v23, s38, 5 +; GFX8-NEXT: v_writelane_b32 v23, s39, 6 +; GFX8-NEXT: v_writelane_b32 v23, s48, 7 +; GFX8-NEXT: v_writelane_b32 v23, s49, 8 +; GFX8-NEXT: v_writelane_b32 v23, s50, 9 +; GFX8-NEXT: v_writelane_b32 v23, s51, 10 +; GFX8-NEXT: v_writelane_b32 v23, s52, 11 +; GFX8-NEXT: v_writelane_b32 v23, s53, 12 +; GFX8-NEXT: v_writelane_b32 v23, s54, 13 +; GFX8-NEXT: v_writelane_b32 v23, s55, 14 +; GFX8-NEXT: v_writelane_b32 v23, s30, 15 +; GFX8-NEXT: v_writelane_b32 v23, s31, 16 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX8-NEXT: v_writelane_b32 v23, s54, 15 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_writelane_b32 v23, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND @@ -141,23 +141,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s55, v23, 16 -; GFX8-NEXT: v_readlane_b32 s54, v23, 15 -; GFX8-NEXT: v_readlane_b32 s53, v23, 14 -; GFX8-NEXT: v_readlane_b32 s52, v23, 13 -; GFX8-NEXT: v_readlane_b32 s51, v23, 12 -; GFX8-NEXT: v_readlane_b32 s50, v23, 11 -; GFX8-NEXT: v_readlane_b32 s49, v23, 10 -; GFX8-NEXT: v_readlane_b32 s48, v23, 9 -; GFX8-NEXT: v_readlane_b32 s39, v23, 8 -; GFX8-NEXT: v_readlane_b32 s38, v23, 7 -; GFX8-NEXT: v_readlane_b32 s37, v23, 6 -; GFX8-NEXT: v_readlane_b32 s36, v23, 5 -; GFX8-NEXT: v_readlane_b32 s35, v23, 4 -; GFX8-NEXT: v_readlane_b32 s34, v23, 3 -; GFX8-NEXT: v_readlane_b32 s33, v23, 2 -; GFX8-NEXT: v_readlane_b32 s31, v23, 1 -; GFX8-NEXT: v_readlane_b32 s30, v23, 0 +; GFX8-NEXT: v_readlane_b32 s30, v23, 15 +; GFX8-NEXT: v_readlane_b32 s31, v23, 16 +; GFX8-NEXT: v_readlane_b32 s55, v23, 14 +; GFX8-NEXT: v_readlane_b32 s54, v23, 13 +; GFX8-NEXT: v_readlane_b32 s53, v23, 12 +; GFX8-NEXT: v_readlane_b32 s52, v23, 11 +; GFX8-NEXT: v_readlane_b32 s51, v23, 10 +; GFX8-NEXT: v_readlane_b32 s50, v23, 9 +; GFX8-NEXT: v_readlane_b32 s49, v23, 8 +; GFX8-NEXT: v_readlane_b32 s48, v23, 7 +; GFX8-NEXT: v_readlane_b32 s39, v23, 6 +; GFX8-NEXT: v_readlane_b32 s38, v23, 5 +; GFX8-NEXT: v_readlane_b32 s37, v23, 4 +; GFX8-NEXT: v_readlane_b32 s36, v23, 3 +; GFX8-NEXT: v_readlane_b32 s35, v23, 2 +; GFX8-NEXT: v_readlane_b32 s34, v23, 1 +; GFX8-NEXT: v_readlane_b32 s33, v23, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -172,26 +172,26 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_writelane_b32 v23, s30, 0 -; GFX900-NEXT: v_writelane_b32 v23, s31, 1 -; GFX900-NEXT: v_writelane_b32 v23, s33, 2 -; GFX900-NEXT: v_writelane_b32 v23, s34, 3 -; GFX900-NEXT: v_writelane_b32 v23, s35, 4 -; GFX900-NEXT: v_writelane_b32 v23, s36, 5 -; GFX900-NEXT: v_writelane_b32 v23, s37, 6 -; GFX900-NEXT: v_writelane_b32 v23, s38, 7 -; GFX900-NEXT: v_writelane_b32 v23, s39, 8 -; GFX900-NEXT: v_writelane_b32 v23, s48, 9 -; GFX900-NEXT: v_writelane_b32 v23, s49, 10 -; GFX900-NEXT: v_writelane_b32 v23, s50, 11 -; GFX900-NEXT: v_writelane_b32 v23, s51, 12 -; GFX900-NEXT: v_writelane_b32 v23, s52, 13 -; GFX900-NEXT: v_writelane_b32 v23, s53, 14 +; GFX900-NEXT: v_writelane_b32 v23, s33, 0 +; GFX900-NEXT: v_writelane_b32 v23, s34, 1 +; GFX900-NEXT: v_writelane_b32 v23, s35, 2 +; GFX900-NEXT: v_writelane_b32 v23, s36, 3 +; GFX900-NEXT: v_writelane_b32 v23, s37, 4 +; GFX900-NEXT: v_writelane_b32 v23, s38, 5 +; GFX900-NEXT: v_writelane_b32 v23, s39, 6 +; GFX900-NEXT: v_writelane_b32 v23, s48, 7 +; GFX900-NEXT: v_writelane_b32 v23, s49, 8 +; GFX900-NEXT: v_writelane_b32 v23, s50, 9 +; GFX900-NEXT: v_writelane_b32 v23, s51, 10 +; GFX900-NEXT: v_writelane_b32 v23, s52, 11 +; GFX900-NEXT: v_writelane_b32 v23, s53, 12 +; GFX900-NEXT: v_writelane_b32 v23, s54, 13 +; GFX900-NEXT: v_writelane_b32 v23, s55, 14 +; GFX900-NEXT: v_writelane_b32 v23, s30, 15 +; GFX900-NEXT: v_writelane_b32 v23, s31, 16 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX900-NEXT: v_writelane_b32 v23, s54, 15 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_writelane_b32 v23, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND @@ -208,23 +208,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s55, v23, 16 -; GFX900-NEXT: v_readlane_b32 s54, v23, 15 -; GFX900-NEXT: v_readlane_b32 s53, v23, 14 -; GFX900-NEXT: v_readlane_b32 s52, v23, 13 -; GFX900-NEXT: v_readlane_b32 s51, v23, 12 -; GFX900-NEXT: v_readlane_b32 s50, v23, 11 -; GFX900-NEXT: v_readlane_b32 s49, v23, 10 -; GFX900-NEXT: v_readlane_b32 s48, v23, 9 -; GFX900-NEXT: v_readlane_b32 s39, v23, 8 -; GFX900-NEXT: v_readlane_b32 s38, v23, 7 -; GFX900-NEXT: v_readlane_b32 s37, v23, 6 -; GFX900-NEXT: v_readlane_b32 s36, v23, 5 -; GFX900-NEXT: v_readlane_b32 s35, v23, 4 -; GFX900-NEXT: v_readlane_b32 s34, v23, 3 -; GFX900-NEXT: v_readlane_b32 s33, v23, 2 -; GFX900-NEXT: v_readlane_b32 s31, v23, 1 -; GFX900-NEXT: v_readlane_b32 s30, v23, 0 +; GFX900-NEXT: v_readlane_b32 s30, v23, 15 +; GFX900-NEXT: v_readlane_b32 s31, v23, 16 +; GFX900-NEXT: v_readlane_b32 s55, v23, 14 +; GFX900-NEXT: v_readlane_b32 s54, v23, 13 +; GFX900-NEXT: v_readlane_b32 s53, v23, 12 +; GFX900-NEXT: v_readlane_b32 s52, v23, 11 +; GFX900-NEXT: v_readlane_b32 s51, v23, 10 +; GFX900-NEXT: v_readlane_b32 s50, v23, 9 +; GFX900-NEXT: v_readlane_b32 s49, v23, 8 +; GFX900-NEXT: v_readlane_b32 s48, v23, 7 +; GFX900-NEXT: v_readlane_b32 s39, v23, 6 +; GFX900-NEXT: v_readlane_b32 s38, v23, 5 +; GFX900-NEXT: v_readlane_b32 s37, v23, 4 +; GFX900-NEXT: v_readlane_b32 s36, v23, 3 +; GFX900-NEXT: v_readlane_b32 s35, v23, 2 +; GFX900-NEXT: v_readlane_b32 s34, v23, 1 +; GFX900-NEXT: v_readlane_b32 s33, v23, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 ; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -239,26 +239,27 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_store_dword off, v23, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: v_writelane_b32 v23, s30, 0 -; GFX942-NEXT: v_writelane_b32 v23, s31, 1 -; GFX942-NEXT: v_writelane_b32 v23, s33, 2 -; GFX942-NEXT: v_writelane_b32 v23, s34, 3 -; GFX942-NEXT: v_writelane_b32 v23, s35, 4 -; GFX942-NEXT: v_writelane_b32 v23, s36, 5 -; GFX942-NEXT: v_writelane_b32 v23, s37, 6 -; GFX942-NEXT: v_writelane_b32 v23, s38, 7 -; GFX942-NEXT: v_writelane_b32 v23, s39, 8 -; GFX942-NEXT: v_writelane_b32 v23, s48, 9 -; GFX942-NEXT: v_writelane_b32 v23, s49, 10 -; GFX942-NEXT: v_writelane_b32 v23, s50, 11 -; GFX942-NEXT: v_writelane_b32 v23, s51, 12 -; GFX942-NEXT: v_writelane_b32 v23, s52, 13 -; GFX942-NEXT: v_writelane_b32 v23, s53, 14 +; GFX942-NEXT: v_writelane_b32 v23, s33, 0 +; GFX942-NEXT: v_writelane_b32 v23, s34, 1 +; GFX942-NEXT: v_writelane_b32 v23, s35, 2 +; GFX942-NEXT: v_writelane_b32 v23, s36, 3 +; GFX942-NEXT: v_writelane_b32 v23, s37, 4 +; GFX942-NEXT: v_writelane_b32 v23, s38, 5 +; GFX942-NEXT: v_writelane_b32 v23, s39, 6 +; GFX942-NEXT: v_writelane_b32 v23, s48, 7 +; GFX942-NEXT: v_writelane_b32 v23, s49, 8 +; GFX942-NEXT: v_writelane_b32 v23, s50, 9 +; GFX942-NEXT: v_writelane_b32 v23, s51, 10 +; GFX942-NEXT: v_writelane_b32 v23, s52, 11 +; GFX942-NEXT: v_writelane_b32 v23, s53, 12 +; GFX942-NEXT: v_writelane_b32 v23, s54, 13 +; GFX942-NEXT: v_writelane_b32 v23, s55, 14 +; GFX942-NEXT: v_writelane_b32 v23, s30, 15 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v23, s31, 16 ; GFX942-NEXT: s_add_i32 s0, s32, 64 -; GFX942-NEXT: v_writelane_b32 v23, s54, 15 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec -; GFX942-NEXT: v_writelane_b32 v23, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND @@ -273,23 +274,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s55, v23, 16 -; GFX942-NEXT: v_readlane_b32 s54, v23, 15 -; GFX942-NEXT: v_readlane_b32 s53, v23, 14 -; GFX942-NEXT: v_readlane_b32 s52, v23, 13 -; GFX942-NEXT: v_readlane_b32 s51, v23, 12 -; GFX942-NEXT: v_readlane_b32 s50, v23, 11 -; GFX942-NEXT: v_readlane_b32 s49, v23, 10 -; GFX942-NEXT: v_readlane_b32 s48, v23, 9 -; GFX942-NEXT: v_readlane_b32 s39, v23, 8 -; GFX942-NEXT: v_readlane_b32 s38, v23, 7 -; GFX942-NEXT: v_readlane_b32 s37, v23, 6 -; GFX942-NEXT: v_readlane_b32 s36, v23, 5 -; GFX942-NEXT: v_readlane_b32 s35, v23, 4 -; GFX942-NEXT: v_readlane_b32 s34, v23, 3 -; GFX942-NEXT: v_readlane_b32 s33, v23, 2 -; GFX942-NEXT: v_readlane_b32 s31, v23, 1 -; GFX942-NEXT: v_readlane_b32 s30, v23, 0 +; GFX942-NEXT: v_readlane_b32 s30, v23, 15 +; GFX942-NEXT: v_readlane_b32 s31, v23, 16 +; GFX942-NEXT: v_readlane_b32 s55, v23, 14 +; GFX942-NEXT: v_readlane_b32 s54, v23, 13 +; GFX942-NEXT: v_readlane_b32 s53, v23, 12 +; GFX942-NEXT: v_readlane_b32 s52, v23, 11 +; GFX942-NEXT: v_readlane_b32 s51, v23, 10 +; GFX942-NEXT: v_readlane_b32 s50, v23, 9 +; GFX942-NEXT: v_readlane_b32 s49, v23, 8 +; GFX942-NEXT: v_readlane_b32 s48, v23, 7 +; GFX942-NEXT: v_readlane_b32 s39, v23, 6 +; GFX942-NEXT: v_readlane_b32 s38, v23, 5 +; GFX942-NEXT: v_readlane_b32 s37, v23, 4 +; GFX942-NEXT: v_readlane_b32 s36, v23, 3 +; GFX942-NEXT: v_readlane_b32 s35, v23, 2 +; GFX942-NEXT: v_readlane_b32 s34, v23, 1 +; GFX942-NEXT: v_readlane_b32 s33, v23, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 ; GFX942-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload @@ -305,29 +306,29 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_1-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_writelane_b32 v23, s30, 0 +; GFX10_1-NEXT: v_writelane_b32 v23, s33, 0 +; GFX10_1-NEXT: v_writelane_b32 v23, s34, 1 +; GFX10_1-NEXT: v_writelane_b32 v23, s35, 2 +; GFX10_1-NEXT: v_writelane_b32 v23, s36, 3 +; GFX10_1-NEXT: v_writelane_b32 v23, s37, 4 +; GFX10_1-NEXT: v_writelane_b32 v23, s38, 5 +; GFX10_1-NEXT: v_writelane_b32 v23, s39, 6 +; GFX10_1-NEXT: v_writelane_b32 v23, s48, 7 +; GFX10_1-NEXT: v_writelane_b32 v23, s49, 8 +; GFX10_1-NEXT: v_writelane_b32 v23, s50, 9 +; GFX10_1-NEXT: v_writelane_b32 v23, s51, 10 +; GFX10_1-NEXT: v_writelane_b32 v23, s52, 11 +; GFX10_1-NEXT: v_writelane_b32 v23, s53, 12 +; GFX10_1-NEXT: v_writelane_b32 v23, s54, 13 +; GFX10_1-NEXT: v_writelane_b32 v23, s55, 14 +; GFX10_1-NEXT: v_writelane_b32 v23, s30, 15 +; GFX10_1-NEXT: v_writelane_b32 v23, s31, 16 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo -; GFX10_1-NEXT: v_writelane_b32 v23, s31, 1 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_writelane_b32 v23, s33, 2 -; GFX10_1-NEXT: v_writelane_b32 v23, s34, 3 -; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4 -; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5 -; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7 -; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8 -; GFX10_1-NEXT: v_writelane_b32 v23, s48, 9 -; GFX10_1-NEXT: v_writelane_b32 v23, s49, 10 -; GFX10_1-NEXT: v_writelane_b32 v23, s50, 11 -; GFX10_1-NEXT: v_writelane_b32 v23, s51, 12 -; GFX10_1-NEXT: v_writelane_b32 v23, s52, 13 -; GFX10_1-NEXT: v_writelane_b32 v23, s53, 14 -; GFX10_1-NEXT: v_writelane_b32 v23, s54, 15 -; GFX10_1-NEXT: v_writelane_b32 v23, s55, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_1-NEXT: ;;#ASMEND @@ -338,23 +339,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s55, v23, 16 -; GFX10_1-NEXT: v_readlane_b32 s54, v23, 15 -; GFX10_1-NEXT: v_readlane_b32 s53, v23, 14 -; GFX10_1-NEXT: v_readlane_b32 s52, v23, 13 -; GFX10_1-NEXT: v_readlane_b32 s51, v23, 12 -; GFX10_1-NEXT: v_readlane_b32 s50, v23, 11 -; GFX10_1-NEXT: v_readlane_b32 s49, v23, 10 -; GFX10_1-NEXT: v_readlane_b32 s48, v23, 9 -; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8 -; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7 -; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6 -; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5 -; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4 -; GFX10_1-NEXT: v_readlane_b32 s34, v23, 3 -; GFX10_1-NEXT: v_readlane_b32 s33, v23, 2 -; GFX10_1-NEXT: v_readlane_b32 s31, v23, 1 -; GFX10_1-NEXT: v_readlane_b32 s30, v23, 0 +; GFX10_1-NEXT: v_readlane_b32 s30, v23, 15 +; GFX10_1-NEXT: v_readlane_b32 s31, v23, 16 +; GFX10_1-NEXT: v_readlane_b32 s55, v23, 14 +; GFX10_1-NEXT: v_readlane_b32 s54, v23, 13 +; GFX10_1-NEXT: v_readlane_b32 s53, v23, 12 +; GFX10_1-NEXT: v_readlane_b32 s52, v23, 11 +; GFX10_1-NEXT: v_readlane_b32 s51, v23, 10 +; GFX10_1-NEXT: v_readlane_b32 s50, v23, 9 +; GFX10_1-NEXT: v_readlane_b32 s49, v23, 8 +; GFX10_1-NEXT: v_readlane_b32 s48, v23, 7 +; GFX10_1-NEXT: v_readlane_b32 s39, v23, 6 +; GFX10_1-NEXT: v_readlane_b32 s38, v23, 5 +; GFX10_1-NEXT: v_readlane_b32 s37, v23, 4 +; GFX10_1-NEXT: v_readlane_b32 s36, v23, 3 +; GFX10_1-NEXT: v_readlane_b32 s35, v23, 2 +; GFX10_1-NEXT: v_readlane_b32 s34, v23, 1 +; GFX10_1-NEXT: v_readlane_b32 s33, v23, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_1-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload @@ -370,29 +371,29 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_writelane_b32 v23, s30, 0 +; GFX10_3-NEXT: v_writelane_b32 v23, s33, 0 +; GFX10_3-NEXT: v_writelane_b32 v23, s34, 1 +; GFX10_3-NEXT: v_writelane_b32 v23, s35, 2 +; GFX10_3-NEXT: v_writelane_b32 v23, s36, 3 +; GFX10_3-NEXT: v_writelane_b32 v23, s37, 4 +; GFX10_3-NEXT: v_writelane_b32 v23, s38, 5 +; GFX10_3-NEXT: v_writelane_b32 v23, s39, 6 +; GFX10_3-NEXT: v_writelane_b32 v23, s48, 7 +; GFX10_3-NEXT: v_writelane_b32 v23, s49, 8 +; GFX10_3-NEXT: v_writelane_b32 v23, s50, 9 +; GFX10_3-NEXT: v_writelane_b32 v23, s51, 10 +; GFX10_3-NEXT: v_writelane_b32 v23, s52, 11 +; GFX10_3-NEXT: v_writelane_b32 v23, s53, 12 +; GFX10_3-NEXT: v_writelane_b32 v23, s54, 13 +; GFX10_3-NEXT: v_writelane_b32 v23, s55, 14 +; GFX10_3-NEXT: v_writelane_b32 v23, s30, 15 +; GFX10_3-NEXT: v_writelane_b32 v23, s31, 16 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo -; GFX10_3-NEXT: v_writelane_b32 v23, s31, 1 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_writelane_b32 v23, s33, 2 -; GFX10_3-NEXT: v_writelane_b32 v23, s34, 3 -; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4 -; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5 -; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7 -; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8 -; GFX10_3-NEXT: v_writelane_b32 v23, s48, 9 -; GFX10_3-NEXT: v_writelane_b32 v23, s49, 10 -; GFX10_3-NEXT: v_writelane_b32 v23, s50, 11 -; GFX10_3-NEXT: v_writelane_b32 v23, s51, 12 -; GFX10_3-NEXT: v_writelane_b32 v23, s52, 13 -; GFX10_3-NEXT: v_writelane_b32 v23, s53, 14 -; GFX10_3-NEXT: v_writelane_b32 v23, s54, 15 -; GFX10_3-NEXT: v_writelane_b32 v23, s55, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_3-NEXT: ;;#ASMEND @@ -403,23 +404,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s55, v23, 16 -; GFX10_3-NEXT: v_readlane_b32 s54, v23, 15 -; GFX10_3-NEXT: v_readlane_b32 s53, v23, 14 -; GFX10_3-NEXT: v_readlane_b32 s52, v23, 13 -; GFX10_3-NEXT: v_readlane_b32 s51, v23, 12 -; GFX10_3-NEXT: v_readlane_b32 s50, v23, 11 -; GFX10_3-NEXT: v_readlane_b32 s49, v23, 10 -; GFX10_3-NEXT: v_readlane_b32 s48, v23, 9 -; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8 -; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7 -; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6 -; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5 -; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4 -; GFX10_3-NEXT: v_readlane_b32 s34, v23, 3 -; GFX10_3-NEXT: v_readlane_b32 s33, v23, 2 -; GFX10_3-NEXT: v_readlane_b32 s31, v23, 1 -; GFX10_3-NEXT: v_readlane_b32 s30, v23, 0 +; GFX10_3-NEXT: v_readlane_b32 s30, v23, 15 +; GFX10_3-NEXT: v_readlane_b32 s31, v23, 16 +; GFX10_3-NEXT: v_readlane_b32 s55, v23, 14 +; GFX10_3-NEXT: v_readlane_b32 s54, v23, 13 +; GFX10_3-NEXT: v_readlane_b32 s53, v23, 12 +; GFX10_3-NEXT: v_readlane_b32 s52, v23, 11 +; GFX10_3-NEXT: v_readlane_b32 s51, v23, 10 +; GFX10_3-NEXT: v_readlane_b32 s50, v23, 9 +; GFX10_3-NEXT: v_readlane_b32 s49, v23, 8 +; GFX10_3-NEXT: v_readlane_b32 s48, v23, 7 +; GFX10_3-NEXT: v_readlane_b32 s39, v23, 6 +; GFX10_3-NEXT: v_readlane_b32 s38, v23, 5 +; GFX10_3-NEXT: v_readlane_b32 s37, v23, 4 +; GFX10_3-NEXT: v_readlane_b32 s36, v23, 3 +; GFX10_3-NEXT: v_readlane_b32 s35, v23, 2 +; GFX10_3-NEXT: v_readlane_b32 s34, v23, 1 +; GFX10_3-NEXT: v_readlane_b32 s33, v23, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 ; GFX10_3-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload @@ -434,59 +435,59 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v23, s30, 0 +; GFX11-NEXT: v_writelane_b32 v23, s33, 0 +; GFX11-NEXT: v_writelane_b32 v23, s34, 1 +; GFX11-NEXT: v_writelane_b32 v23, s35, 2 +; GFX11-NEXT: v_writelane_b32 v23, s36, 3 +; GFX11-NEXT: v_writelane_b32 v23, s37, 4 +; GFX11-NEXT: v_writelane_b32 v23, s38, 5 +; GFX11-NEXT: v_writelane_b32 v23, s39, 6 +; GFX11-NEXT: v_writelane_b32 v23, s48, 7 +; GFX11-NEXT: v_writelane_b32 v23, s49, 8 +; GFX11-NEXT: v_writelane_b32 v23, s50, 9 +; GFX11-NEXT: v_writelane_b32 v23, s51, 10 +; GFX11-NEXT: v_writelane_b32 v23, s52, 11 +; GFX11-NEXT: v_writelane_b32 v23, s53, 12 +; GFX11-NEXT: v_writelane_b32 v23, s54, 13 +; GFX11-NEXT: v_writelane_b32 v23, s55, 14 +; GFX11-NEXT: v_writelane_b32 v23, s30, 15 +; GFX11-NEXT: v_writelane_b32 v23, s31, 16 ; GFX11-NEXT: s_add_i32 s0, s32, 64 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo -; GFX11-NEXT: v_writelane_b32 v23, s31, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v23, s33, 2 -; GFX11-NEXT: v_writelane_b32 v23, s34, 3 -; GFX11-NEXT: v_writelane_b32 v23, s35, 4 -; GFX11-NEXT: v_writelane_b32 v23, s36, 5 -; GFX11-NEXT: v_writelane_b32 v23, s37, 6 -; GFX11-NEXT: v_writelane_b32 v23, s38, 7 -; GFX11-NEXT: v_writelane_b32 v23, s39, 8 -; GFX11-NEXT: v_writelane_b32 v23, s48, 9 -; GFX11-NEXT: v_writelane_b32 v23, s49, 10 -; GFX11-NEXT: v_writelane_b32 v23, s50, 11 -; GFX11-NEXT: v_writelane_b32 v23, s51, 12 -; GFX11-NEXT: v_writelane_b32 v23, s52, 13 -; GFX11-NEXT: v_writelane_b32 v23, s53, 14 -; GFX11-NEXT: v_writelane_b32 v23, s54, 15 -; GFX11-NEXT: v_writelane_b32 v23, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_addc_u32 s59, s32, 0x4040 ; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_bitcmp1_b32 s59, 0 ; GFX11-NEXT: s_bitset0_b32 s59, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s54, s59 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s55, v23, 16 -; GFX11-NEXT: v_readlane_b32 s54, v23, 15 -; GFX11-NEXT: v_readlane_b32 s53, v23, 14 -; GFX11-NEXT: v_readlane_b32 s52, v23, 13 -; GFX11-NEXT: v_readlane_b32 s51, v23, 12 -; GFX11-NEXT: v_readlane_b32 s50, v23, 11 -; GFX11-NEXT: v_readlane_b32 s49, v23, 10 -; GFX11-NEXT: v_readlane_b32 s48, v23, 9 -; GFX11-NEXT: v_readlane_b32 s39, v23, 8 -; GFX11-NEXT: v_readlane_b32 s38, v23, 7 -; GFX11-NEXT: v_readlane_b32 s37, v23, 6 -; GFX11-NEXT: v_readlane_b32 s36, v23, 5 -; GFX11-NEXT: v_readlane_b32 s35, v23, 4 -; GFX11-NEXT: v_readlane_b32 s34, v23, 3 -; GFX11-NEXT: v_readlane_b32 s33, v23, 2 -; GFX11-NEXT: v_readlane_b32 s31, v23, 1 -; GFX11-NEXT: v_readlane_b32 s30, v23, 0 +; GFX11-NEXT: v_readlane_b32 s30, v23, 15 +; GFX11-NEXT: v_readlane_b32 s31, v23, 16 +; GFX11-NEXT: v_readlane_b32 s55, v23, 14 +; GFX11-NEXT: v_readlane_b32 s54, v23, 13 +; GFX11-NEXT: v_readlane_b32 s53, v23, 12 +; GFX11-NEXT: v_readlane_b32 s52, v23, 11 +; GFX11-NEXT: v_readlane_b32 s51, v23, 10 +; GFX11-NEXT: v_readlane_b32 s50, v23, 9 +; GFX11-NEXT: v_readlane_b32 s49, v23, 8 +; GFX11-NEXT: v_readlane_b32 s48, v23, 7 +; GFX11-NEXT: v_readlane_b32 s39, v23, 6 +; GFX11-NEXT: v_readlane_b32 s38, v23, 5 +; GFX11-NEXT: v_readlane_b32 s37, v23, 4 +; GFX11-NEXT: v_readlane_b32 s36, v23, 3 +; GFX11-NEXT: v_readlane_b32 s35, v23, 2 +; GFX11-NEXT: v_readlane_b32 s34, v23, 1 +; GFX11-NEXT: v_readlane_b32 s33, v23, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 ; GFX11-NEXT: scratch_load_b32 v23, off, s1 ; 4-byte Folded Reload @@ -505,28 +506,28 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX12-NEXT: scratch_store_b32 off, v23, s32 offset:16388 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v23, s30, 0 +; GFX12-NEXT: v_writelane_b32 v23, s33, 0 +; GFX12-NEXT: v_writelane_b32 v23, s34, 1 +; GFX12-NEXT: v_writelane_b32 v23, s35, 2 +; GFX12-NEXT: v_writelane_b32 v23, s36, 3 +; GFX12-NEXT: v_writelane_b32 v23, s37, 4 +; GFX12-NEXT: v_writelane_b32 v23, s38, 5 +; GFX12-NEXT: v_writelane_b32 v23, s39, 6 +; GFX12-NEXT: v_writelane_b32 v23, s48, 7 +; GFX12-NEXT: v_writelane_b32 v23, s49, 8 +; GFX12-NEXT: v_writelane_b32 v23, s50, 9 +; GFX12-NEXT: v_writelane_b32 v23, s51, 10 +; GFX12-NEXT: v_writelane_b32 v23, s52, 11 +; GFX12-NEXT: v_writelane_b32 v23, s53, 12 +; GFX12-NEXT: v_writelane_b32 v23, s54, 13 +; GFX12-NEXT: v_writelane_b32 v23, s55, 14 +; GFX12-NEXT: v_writelane_b32 v23, s30, 15 +; GFX12-NEXT: v_writelane_b32 v23, s31, 16 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_writelane_b32 v23, s31, 1 -; GFX12-NEXT: v_writelane_b32 v23, s33, 2 -; GFX12-NEXT: v_writelane_b32 v23, s34, 3 -; GFX12-NEXT: v_writelane_b32 v23, s35, 4 -; GFX12-NEXT: v_writelane_b32 v23, s36, 5 -; GFX12-NEXT: v_writelane_b32 v23, s37, 6 -; GFX12-NEXT: v_writelane_b32 v23, s38, 7 -; GFX12-NEXT: v_writelane_b32 v23, s39, 8 -; GFX12-NEXT: v_writelane_b32 v23, s48, 9 -; GFX12-NEXT: v_writelane_b32 v23, s49, 10 -; GFX12-NEXT: v_writelane_b32 v23, s50, 11 -; GFX12-NEXT: v_writelane_b32 v23, s51, 12 -; GFX12-NEXT: v_writelane_b32 v23, s52, 13 -; GFX12-NEXT: v_writelane_b32 v23, s53, 14 -; GFX12-NEXT: v_writelane_b32 v23, s54, 15 -; GFX12-NEXT: v_writelane_b32 v23, s55, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX12-NEXT: ;;#ASMEND @@ -540,23 +541,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s55, v23, 16 -; GFX12-NEXT: v_readlane_b32 s54, v23, 15 -; GFX12-NEXT: v_readlane_b32 s53, v23, 14 -; GFX12-NEXT: v_readlane_b32 s52, v23, 13 -; GFX12-NEXT: v_readlane_b32 s51, v23, 12 -; GFX12-NEXT: v_readlane_b32 s50, v23, 11 -; GFX12-NEXT: v_readlane_b32 s49, v23, 10 -; GFX12-NEXT: v_readlane_b32 s48, v23, 9 -; GFX12-NEXT: v_readlane_b32 s39, v23, 8 -; GFX12-NEXT: v_readlane_b32 s38, v23, 7 -; GFX12-NEXT: v_readlane_b32 s37, v23, 6 -; GFX12-NEXT: v_readlane_b32 s36, v23, 5 -; GFX12-NEXT: v_readlane_b32 s35, v23, 4 -; GFX12-NEXT: v_readlane_b32 s34, v23, 3 -; GFX12-NEXT: v_readlane_b32 s33, v23, 2 -; GFX12-NEXT: v_readlane_b32 s31, v23, 1 -; GFX12-NEXT: v_readlane_b32 s30, v23, 0 +; GFX12-NEXT: v_readlane_b32 s30, v23, 15 +; GFX12-NEXT: v_readlane_b32 s31, v23, 16 +; GFX12-NEXT: v_readlane_b32 s55, v23, 14 +; GFX12-NEXT: v_readlane_b32 s54, v23, 13 +; GFX12-NEXT: v_readlane_b32 s53, v23, 12 +; GFX12-NEXT: v_readlane_b32 s52, v23, 11 +; GFX12-NEXT: v_readlane_b32 s51, v23, 10 +; GFX12-NEXT: v_readlane_b32 s50, v23, 9 +; GFX12-NEXT: v_readlane_b32 s49, v23, 8 +; GFX12-NEXT: v_readlane_b32 s48, v23, 7 +; GFX12-NEXT: v_readlane_b32 s39, v23, 6 +; GFX12-NEXT: v_readlane_b32 s38, v23, 5 +; GFX12-NEXT: v_readlane_b32 s37, v23, 4 +; GFX12-NEXT: v_readlane_b32 s36, v23, 3 +; GFX12-NEXT: v_readlane_b32 s35, v23, 2 +; GFX12-NEXT: v_readlane_b32 s34, v23, 1 +; GFX12-NEXT: v_readlane_b32 s33, v23, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v23, off, s32 offset:16388 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe @@ -613,24 +614,24 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX7-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX7-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[4:5] -; GFX7-NEXT: v_writelane_b32 v21, s30, 0 -; GFX7-NEXT: v_writelane_b32 v21, s31, 1 -; GFX7-NEXT: v_writelane_b32 v21, s33, 2 -; GFX7-NEXT: v_writelane_b32 v21, s34, 3 -; GFX7-NEXT: v_writelane_b32 v21, s35, 4 -; GFX7-NEXT: v_writelane_b32 v21, s36, 5 -; GFX7-NEXT: v_writelane_b32 v21, s37, 6 -; GFX7-NEXT: v_writelane_b32 v21, s38, 7 -; GFX7-NEXT: v_writelane_b32 v21, s39, 8 -; GFX7-NEXT: v_writelane_b32 v21, s48, 9 -; GFX7-NEXT: v_writelane_b32 v21, s49, 10 -; GFX7-NEXT: v_writelane_b32 v21, s50, 11 -; GFX7-NEXT: v_writelane_b32 v21, s51, 12 -; GFX7-NEXT: v_writelane_b32 v21, s52, 13 -; GFX7-NEXT: v_writelane_b32 v21, s53, 14 -; GFX7-NEXT: v_writelane_b32 v21, s54, 15 +; GFX7-NEXT: v_writelane_b32 v21, s33, 0 +; GFX7-NEXT: v_writelane_b32 v21, s34, 1 +; GFX7-NEXT: v_writelane_b32 v21, s35, 2 +; GFX7-NEXT: v_writelane_b32 v21, s36, 3 +; GFX7-NEXT: v_writelane_b32 v21, s37, 4 +; GFX7-NEXT: v_writelane_b32 v21, s38, 5 +; GFX7-NEXT: v_writelane_b32 v21, s39, 6 +; GFX7-NEXT: v_writelane_b32 v21, s48, 7 +; GFX7-NEXT: v_writelane_b32 v21, s49, 8 +; GFX7-NEXT: v_writelane_b32 v21, s50, 9 +; GFX7-NEXT: v_writelane_b32 v21, s51, 10 +; GFX7-NEXT: v_writelane_b32 v21, s52, 11 +; GFX7-NEXT: v_writelane_b32 v21, s53, 12 +; GFX7-NEXT: v_writelane_b32 v21, s54, 13 +; GFX7-NEXT: v_writelane_b32 v21, s55, 14 +; GFX7-NEXT: v_writelane_b32 v21, s30, 15 +; GFX7-NEXT: v_writelane_b32 v21, s31, 16 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec -; GFX7-NEXT: v_writelane_b32 v21, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX7-NEXT: ;;#ASMEND @@ -640,23 +641,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND -; GFX7-NEXT: v_readlane_b32 s55, v21, 16 -; GFX7-NEXT: v_readlane_b32 s54, v21, 15 -; GFX7-NEXT: v_readlane_b32 s53, v21, 14 -; GFX7-NEXT: v_readlane_b32 s52, v21, 13 -; GFX7-NEXT: v_readlane_b32 s51, v21, 12 -; GFX7-NEXT: v_readlane_b32 s50, v21, 11 -; GFX7-NEXT: v_readlane_b32 s49, v21, 10 -; GFX7-NEXT: v_readlane_b32 s48, v21, 9 -; GFX7-NEXT: v_readlane_b32 s39, v21, 8 -; GFX7-NEXT: v_readlane_b32 s38, v21, 7 -; GFX7-NEXT: v_readlane_b32 s37, v21, 6 -; GFX7-NEXT: v_readlane_b32 s36, v21, 5 -; GFX7-NEXT: v_readlane_b32 s35, v21, 4 -; GFX7-NEXT: v_readlane_b32 s34, v21, 3 -; GFX7-NEXT: v_readlane_b32 s33, v21, 2 -; GFX7-NEXT: v_readlane_b32 s31, v21, 1 -; GFX7-NEXT: v_readlane_b32 s30, v21, 0 +; GFX7-NEXT: v_readlane_b32 s30, v21, 15 +; GFX7-NEXT: v_readlane_b32 s31, v21, 16 +; GFX7-NEXT: v_readlane_b32 s55, v21, 14 +; GFX7-NEXT: v_readlane_b32 s54, v21, 13 +; GFX7-NEXT: v_readlane_b32 s53, v21, 12 +; GFX7-NEXT: v_readlane_b32 s52, v21, 11 +; GFX7-NEXT: v_readlane_b32 s51, v21, 10 +; GFX7-NEXT: v_readlane_b32 s50, v21, 9 +; GFX7-NEXT: v_readlane_b32 s49, v21, 8 +; GFX7-NEXT: v_readlane_b32 s48, v21, 7 +; GFX7-NEXT: v_readlane_b32 s39, v21, 6 +; GFX7-NEXT: v_readlane_b32 s38, v21, 5 +; GFX7-NEXT: v_readlane_b32 s37, v21, 4 +; GFX7-NEXT: v_readlane_b32 s36, v21, 3 +; GFX7-NEXT: v_readlane_b32 s35, v21, 2 +; GFX7-NEXT: v_readlane_b32 s34, v21, 1 +; GFX7-NEXT: v_readlane_b32 s33, v21, 0 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX7-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload @@ -671,24 +672,24 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX8-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX8-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v21, s30, 0 -; GFX8-NEXT: v_writelane_b32 v21, s31, 1 -; GFX8-NEXT: v_writelane_b32 v21, s33, 2 -; GFX8-NEXT: v_writelane_b32 v21, s34, 3 -; GFX8-NEXT: v_writelane_b32 v21, s35, 4 -; GFX8-NEXT: v_writelane_b32 v21, s36, 5 -; GFX8-NEXT: v_writelane_b32 v21, s37, 6 -; GFX8-NEXT: v_writelane_b32 v21, s38, 7 -; GFX8-NEXT: v_writelane_b32 v21, s39, 8 -; GFX8-NEXT: v_writelane_b32 v21, s48, 9 -; GFX8-NEXT: v_writelane_b32 v21, s49, 10 -; GFX8-NEXT: v_writelane_b32 v21, s50, 11 -; GFX8-NEXT: v_writelane_b32 v21, s51, 12 -; GFX8-NEXT: v_writelane_b32 v21, s52, 13 -; GFX8-NEXT: v_writelane_b32 v21, s53, 14 -; GFX8-NEXT: v_writelane_b32 v21, s54, 15 +; GFX8-NEXT: v_writelane_b32 v21, s33, 0 +; GFX8-NEXT: v_writelane_b32 v21, s34, 1 +; GFX8-NEXT: v_writelane_b32 v21, s35, 2 +; GFX8-NEXT: v_writelane_b32 v21, s36, 3 +; GFX8-NEXT: v_writelane_b32 v21, s37, 4 +; GFX8-NEXT: v_writelane_b32 v21, s38, 5 +; GFX8-NEXT: v_writelane_b32 v21, s39, 6 +; GFX8-NEXT: v_writelane_b32 v21, s48, 7 +; GFX8-NEXT: v_writelane_b32 v21, s49, 8 +; GFX8-NEXT: v_writelane_b32 v21, s50, 9 +; GFX8-NEXT: v_writelane_b32 v21, s51, 10 +; GFX8-NEXT: v_writelane_b32 v21, s52, 11 +; GFX8-NEXT: v_writelane_b32 v21, s53, 12 +; GFX8-NEXT: v_writelane_b32 v21, s54, 13 +; GFX8-NEXT: v_writelane_b32 v21, s55, 14 +; GFX8-NEXT: v_writelane_b32 v21, s30, 15 +; GFX8-NEXT: v_writelane_b32 v21, s31, 16 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_writelane_b32 v21, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX8-NEXT: ;;#ASMEND @@ -699,23 +700,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s55, v21, 16 -; GFX8-NEXT: v_readlane_b32 s54, v21, 15 -; GFX8-NEXT: v_readlane_b32 s53, v21, 14 -; GFX8-NEXT: v_readlane_b32 s52, v21, 13 -; GFX8-NEXT: v_readlane_b32 s51, v21, 12 -; GFX8-NEXT: v_readlane_b32 s50, v21, 11 -; GFX8-NEXT: v_readlane_b32 s49, v21, 10 -; GFX8-NEXT: v_readlane_b32 s48, v21, 9 -; GFX8-NEXT: v_readlane_b32 s39, v21, 8 -; GFX8-NEXT: v_readlane_b32 s38, v21, 7 -; GFX8-NEXT: v_readlane_b32 s37, v21, 6 -; GFX8-NEXT: v_readlane_b32 s36, v21, 5 -; GFX8-NEXT: v_readlane_b32 s35, v21, 4 -; GFX8-NEXT: v_readlane_b32 s34, v21, 3 -; GFX8-NEXT: v_readlane_b32 s33, v21, 2 -; GFX8-NEXT: v_readlane_b32 s31, v21, 1 -; GFX8-NEXT: v_readlane_b32 s30, v21, 0 +; GFX8-NEXT: v_readlane_b32 s30, v21, 15 +; GFX8-NEXT: v_readlane_b32 s31, v21, 16 +; GFX8-NEXT: v_readlane_b32 s55, v21, 14 +; GFX8-NEXT: v_readlane_b32 s54, v21, 13 +; GFX8-NEXT: v_readlane_b32 s53, v21, 12 +; GFX8-NEXT: v_readlane_b32 s52, v21, 11 +; GFX8-NEXT: v_readlane_b32 s51, v21, 10 +; GFX8-NEXT: v_readlane_b32 s50, v21, 9 +; GFX8-NEXT: v_readlane_b32 s49, v21, 8 +; GFX8-NEXT: v_readlane_b32 s48, v21, 7 +; GFX8-NEXT: v_readlane_b32 s39, v21, 6 +; GFX8-NEXT: v_readlane_b32 s38, v21, 5 +; GFX8-NEXT: v_readlane_b32 s37, v21, 4 +; GFX8-NEXT: v_readlane_b32 s36, v21, 3 +; GFX8-NEXT: v_readlane_b32 s35, v21, 2 +; GFX8-NEXT: v_readlane_b32 s34, v21, 1 +; GFX8-NEXT: v_readlane_b32 s33, v21, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX8-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload @@ -730,24 +731,24 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX900-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX900-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_writelane_b32 v21, s30, 0 -; GFX900-NEXT: v_writelane_b32 v21, s31, 1 -; GFX900-NEXT: v_writelane_b32 v21, s33, 2 -; GFX900-NEXT: v_writelane_b32 v21, s34, 3 -; GFX900-NEXT: v_writelane_b32 v21, s35, 4 -; GFX900-NEXT: v_writelane_b32 v21, s36, 5 -; GFX900-NEXT: v_writelane_b32 v21, s37, 6 -; GFX900-NEXT: v_writelane_b32 v21, s38, 7 -; GFX900-NEXT: v_writelane_b32 v21, s39, 8 -; GFX900-NEXT: v_writelane_b32 v21, s48, 9 -; GFX900-NEXT: v_writelane_b32 v21, s49, 10 -; GFX900-NEXT: v_writelane_b32 v21, s50, 11 -; GFX900-NEXT: v_writelane_b32 v21, s51, 12 -; GFX900-NEXT: v_writelane_b32 v21, s52, 13 -; GFX900-NEXT: v_writelane_b32 v21, s53, 14 -; GFX900-NEXT: v_writelane_b32 v21, s54, 15 +; GFX900-NEXT: v_writelane_b32 v21, s33, 0 +; GFX900-NEXT: v_writelane_b32 v21, s34, 1 +; GFX900-NEXT: v_writelane_b32 v21, s35, 2 +; GFX900-NEXT: v_writelane_b32 v21, s36, 3 +; GFX900-NEXT: v_writelane_b32 v21, s37, 4 +; GFX900-NEXT: v_writelane_b32 v21, s38, 5 +; GFX900-NEXT: v_writelane_b32 v21, s39, 6 +; GFX900-NEXT: v_writelane_b32 v21, s48, 7 +; GFX900-NEXT: v_writelane_b32 v21, s49, 8 +; GFX900-NEXT: v_writelane_b32 v21, s50, 9 +; GFX900-NEXT: v_writelane_b32 v21, s51, 10 +; GFX900-NEXT: v_writelane_b32 v21, s52, 11 +; GFX900-NEXT: v_writelane_b32 v21, s53, 12 +; GFX900-NEXT: v_writelane_b32 v21, s54, 13 +; GFX900-NEXT: v_writelane_b32 v21, s55, 14 +; GFX900-NEXT: v_writelane_b32 v21, s30, 15 +; GFX900-NEXT: v_writelane_b32 v21, s31, 16 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_writelane_b32 v21, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX900-NEXT: ;;#ASMEND @@ -758,23 +759,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s55, v21, 16 -; GFX900-NEXT: v_readlane_b32 s54, v21, 15 -; GFX900-NEXT: v_readlane_b32 s53, v21, 14 -; GFX900-NEXT: v_readlane_b32 s52, v21, 13 -; GFX900-NEXT: v_readlane_b32 s51, v21, 12 -; GFX900-NEXT: v_readlane_b32 s50, v21, 11 -; GFX900-NEXT: v_readlane_b32 s49, v21, 10 -; GFX900-NEXT: v_readlane_b32 s48, v21, 9 -; GFX900-NEXT: v_readlane_b32 s39, v21, 8 -; GFX900-NEXT: v_readlane_b32 s38, v21, 7 -; GFX900-NEXT: v_readlane_b32 s37, v21, 6 -; GFX900-NEXT: v_readlane_b32 s36, v21, 5 -; GFX900-NEXT: v_readlane_b32 s35, v21, 4 -; GFX900-NEXT: v_readlane_b32 s34, v21, 3 -; GFX900-NEXT: v_readlane_b32 s33, v21, 2 -; GFX900-NEXT: v_readlane_b32 s31, v21, 1 -; GFX900-NEXT: v_readlane_b32 s30, v21, 0 +; GFX900-NEXT: v_readlane_b32 s30, v21, 15 +; GFX900-NEXT: v_readlane_b32 s31, v21, 16 +; GFX900-NEXT: v_readlane_b32 s55, v21, 14 +; GFX900-NEXT: v_readlane_b32 s54, v21, 13 +; GFX900-NEXT: v_readlane_b32 s53, v21, 12 +; GFX900-NEXT: v_readlane_b32 s52, v21, 11 +; GFX900-NEXT: v_readlane_b32 s51, v21, 10 +; GFX900-NEXT: v_readlane_b32 s50, v21, 9 +; GFX900-NEXT: v_readlane_b32 s49, v21, 8 +; GFX900-NEXT: v_readlane_b32 s48, v21, 7 +; GFX900-NEXT: v_readlane_b32 s39, v21, 6 +; GFX900-NEXT: v_readlane_b32 s38, v21, 5 +; GFX900-NEXT: v_readlane_b32 s37, v21, 4 +; GFX900-NEXT: v_readlane_b32 s36, v21, 3 +; GFX900-NEXT: v_readlane_b32 s35, v21, 2 +; GFX900-NEXT: v_readlane_b32 s34, v21, 1 +; GFX900-NEXT: v_readlane_b32 s33, v21, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x100400 ; GFX900-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload @@ -789,24 +790,25 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX942-NEXT: s_add_i32 s2, s32, 0x4010 ; GFX942-NEXT: scratch_store_dword off, v21, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: v_writelane_b32 v21, s30, 0 -; GFX942-NEXT: v_writelane_b32 v21, s31, 1 -; GFX942-NEXT: v_writelane_b32 v21, s33, 2 -; GFX942-NEXT: v_writelane_b32 v21, s34, 3 -; GFX942-NEXT: v_writelane_b32 v21, s35, 4 -; GFX942-NEXT: v_writelane_b32 v21, s36, 5 -; GFX942-NEXT: v_writelane_b32 v21, s37, 6 -; GFX942-NEXT: v_writelane_b32 v21, s38, 7 -; GFX942-NEXT: v_writelane_b32 v21, s39, 8 -; GFX942-NEXT: v_writelane_b32 v21, s48, 9 -; GFX942-NEXT: v_writelane_b32 v21, s49, 10 -; GFX942-NEXT: v_writelane_b32 v21, s50, 11 -; GFX942-NEXT: v_writelane_b32 v21, s51, 12 -; GFX942-NEXT: v_writelane_b32 v21, s52, 13 -; GFX942-NEXT: v_writelane_b32 v21, s53, 14 -; GFX942-NEXT: v_writelane_b32 v21, s54, 15 +; GFX942-NEXT: v_writelane_b32 v21, s33, 0 +; GFX942-NEXT: v_writelane_b32 v21, s34, 1 +; GFX942-NEXT: v_writelane_b32 v21, s35, 2 +; GFX942-NEXT: v_writelane_b32 v21, s36, 3 +; GFX942-NEXT: v_writelane_b32 v21, s37, 4 +; GFX942-NEXT: v_writelane_b32 v21, s38, 5 +; GFX942-NEXT: v_writelane_b32 v21, s39, 6 +; GFX942-NEXT: v_writelane_b32 v21, s48, 7 +; GFX942-NEXT: v_writelane_b32 v21, s49, 8 +; GFX942-NEXT: v_writelane_b32 v21, s50, 9 +; GFX942-NEXT: v_writelane_b32 v21, s51, 10 +; GFX942-NEXT: v_writelane_b32 v21, s52, 11 +; GFX942-NEXT: v_writelane_b32 v21, s53, 12 +; GFX942-NEXT: v_writelane_b32 v21, s54, 13 +; GFX942-NEXT: v_writelane_b32 v21, s55, 14 +; GFX942-NEXT: v_writelane_b32 v21, s30, 15 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v21, s31, 16 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec -; GFX942-NEXT: v_writelane_b32 v21, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX942-NEXT: ;;#ASMEND @@ -818,23 +820,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s55, v21, 16 -; GFX942-NEXT: v_readlane_b32 s54, v21, 15 -; GFX942-NEXT: v_readlane_b32 s53, v21, 14 -; GFX942-NEXT: v_readlane_b32 s52, v21, 13 -; GFX942-NEXT: v_readlane_b32 s51, v21, 12 -; GFX942-NEXT: v_readlane_b32 s50, v21, 11 -; GFX942-NEXT: v_readlane_b32 s49, v21, 10 -; GFX942-NEXT: v_readlane_b32 s48, v21, 9 -; GFX942-NEXT: v_readlane_b32 s39, v21, 8 -; GFX942-NEXT: v_readlane_b32 s38, v21, 7 -; GFX942-NEXT: v_readlane_b32 s37, v21, 6 -; GFX942-NEXT: v_readlane_b32 s36, v21, 5 -; GFX942-NEXT: v_readlane_b32 s35, v21, 4 -; GFX942-NEXT: v_readlane_b32 s34, v21, 3 -; GFX942-NEXT: v_readlane_b32 s33, v21, 2 -; GFX942-NEXT: v_readlane_b32 s31, v21, 1 -; GFX942-NEXT: v_readlane_b32 s30, v21, 0 +; GFX942-NEXT: v_readlane_b32 s30, v21, 15 +; GFX942-NEXT: v_readlane_b32 s31, v21, 16 +; GFX942-NEXT: v_readlane_b32 s55, v21, 14 +; GFX942-NEXT: v_readlane_b32 s54, v21, 13 +; GFX942-NEXT: v_readlane_b32 s53, v21, 12 +; GFX942-NEXT: v_readlane_b32 s52, v21, 11 +; GFX942-NEXT: v_readlane_b32 s51, v21, 10 +; GFX942-NEXT: v_readlane_b32 s50, v21, 9 +; GFX942-NEXT: v_readlane_b32 s49, v21, 8 +; GFX942-NEXT: v_readlane_b32 s48, v21, 7 +; GFX942-NEXT: v_readlane_b32 s39, v21, 6 +; GFX942-NEXT: v_readlane_b32 s38, v21, 5 +; GFX942-NEXT: v_readlane_b32 s37, v21, 4 +; GFX942-NEXT: v_readlane_b32 s36, v21, 3 +; GFX942-NEXT: v_readlane_b32 s35, v21, 2 +; GFX942-NEXT: v_readlane_b32 s34, v21, 1 +; GFX942-NEXT: v_readlane_b32 s33, v21, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x4010 ; GFX942-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload @@ -850,51 +852,51 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_1-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_writelane_b32 v21, s30, 0 -; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo -; GFX10_1-NEXT: v_writelane_b32 v21, s31, 1 -; GFX10_1-NEXT: v_writelane_b32 v21, s33, 2 -; GFX10_1-NEXT: v_writelane_b32 v21, s34, 3 -; GFX10_1-NEXT: v_writelane_b32 v21, s35, 4 -; GFX10_1-NEXT: v_writelane_b32 v21, s36, 5 -; GFX10_1-NEXT: v_writelane_b32 v21, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v21, s38, 7 -; GFX10_1-NEXT: v_writelane_b32 v21, s39, 8 -; GFX10_1-NEXT: v_writelane_b32 v21, s48, 9 -; GFX10_1-NEXT: v_writelane_b32 v21, s49, 10 -; GFX10_1-NEXT: v_writelane_b32 v21, s50, 11 -; GFX10_1-NEXT: v_writelane_b32 v21, s51, 12 -; GFX10_1-NEXT: v_writelane_b32 v21, s52, 13 -; GFX10_1-NEXT: v_writelane_b32 v21, s53, 14 -; GFX10_1-NEXT: v_writelane_b32 v21, s54, 15 -; GFX10_1-NEXT: v_writelane_b32 v21, s55, 16 +; GFX10_1-NEXT: v_writelane_b32 v21, s33, 0 +; GFX10_1-NEXT: v_writelane_b32 v21, s34, 1 +; GFX10_1-NEXT: v_writelane_b32 v21, s35, 2 +; GFX10_1-NEXT: v_writelane_b32 v21, s36, 3 +; GFX10_1-NEXT: v_writelane_b32 v21, s37, 4 +; GFX10_1-NEXT: v_writelane_b32 v21, s38, 5 +; GFX10_1-NEXT: v_writelane_b32 v21, s39, 6 +; GFX10_1-NEXT: v_writelane_b32 v21, s48, 7 +; GFX10_1-NEXT: v_writelane_b32 v21, s49, 8 +; GFX10_1-NEXT: v_writelane_b32 v21, s50, 9 +; GFX10_1-NEXT: v_writelane_b32 v21, s51, 10 +; GFX10_1-NEXT: v_writelane_b32 v21, s52, 11 +; GFX10_1-NEXT: v_writelane_b32 v21, s53, 12 +; GFX10_1-NEXT: v_writelane_b32 v21, s54, 13 +; GFX10_1-NEXT: v_writelane_b32 v21, s55, 14 +; GFX10_1-NEXT: v_writelane_b32 v21, s30, 15 +; GFX10_1-NEXT: v_writelane_b32 v21, s31, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32 ; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22 ; GFX10_1-NEXT: v_readfirstlane_b32 s54, v22 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s55, v21, 16 -; GFX10_1-NEXT: v_readlane_b32 s54, v21, 15 -; GFX10_1-NEXT: v_readlane_b32 s53, v21, 14 -; GFX10_1-NEXT: v_readlane_b32 s52, v21, 13 -; GFX10_1-NEXT: v_readlane_b32 s51, v21, 12 -; GFX10_1-NEXT: v_readlane_b32 s50, v21, 11 -; GFX10_1-NEXT: v_readlane_b32 s49, v21, 10 -; GFX10_1-NEXT: v_readlane_b32 s48, v21, 9 -; GFX10_1-NEXT: v_readlane_b32 s39, v21, 8 -; GFX10_1-NEXT: v_readlane_b32 s38, v21, 7 -; GFX10_1-NEXT: v_readlane_b32 s37, v21, 6 -; GFX10_1-NEXT: v_readlane_b32 s36, v21, 5 -; GFX10_1-NEXT: v_readlane_b32 s35, v21, 4 -; GFX10_1-NEXT: v_readlane_b32 s34, v21, 3 -; GFX10_1-NEXT: v_readlane_b32 s33, v21, 2 -; GFX10_1-NEXT: v_readlane_b32 s31, v21, 1 -; GFX10_1-NEXT: v_readlane_b32 s30, v21, 0 +; GFX10_1-NEXT: v_readlane_b32 s30, v21, 15 +; GFX10_1-NEXT: v_readlane_b32 s31, v21, 16 +; GFX10_1-NEXT: v_readlane_b32 s55, v21, 14 +; GFX10_1-NEXT: v_readlane_b32 s54, v21, 13 +; GFX10_1-NEXT: v_readlane_b32 s53, v21, 12 +; GFX10_1-NEXT: v_readlane_b32 s52, v21, 11 +; GFX10_1-NEXT: v_readlane_b32 s51, v21, 10 +; GFX10_1-NEXT: v_readlane_b32 s50, v21, 9 +; GFX10_1-NEXT: v_readlane_b32 s49, v21, 8 +; GFX10_1-NEXT: v_readlane_b32 s48, v21, 7 +; GFX10_1-NEXT: v_readlane_b32 s39, v21, 6 +; GFX10_1-NEXT: v_readlane_b32 s38, v21, 5 +; GFX10_1-NEXT: v_readlane_b32 s37, v21, 4 +; GFX10_1-NEXT: v_readlane_b32 s36, v21, 3 +; GFX10_1-NEXT: v_readlane_b32 s35, v21, 2 +; GFX10_1-NEXT: v_readlane_b32 s34, v21, 1 +; GFX10_1-NEXT: v_readlane_b32 s33, v21, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200 ; GFX10_1-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload @@ -910,51 +912,51 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200 ; GFX10_3-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_writelane_b32 v21, s30, 0 -; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo -; GFX10_3-NEXT: v_writelane_b32 v21, s31, 1 -; GFX10_3-NEXT: v_writelane_b32 v21, s33, 2 -; GFX10_3-NEXT: v_writelane_b32 v21, s34, 3 -; GFX10_3-NEXT: v_writelane_b32 v21, s35, 4 -; GFX10_3-NEXT: v_writelane_b32 v21, s36, 5 -; GFX10_3-NEXT: v_writelane_b32 v21, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v21, s38, 7 -; GFX10_3-NEXT: v_writelane_b32 v21, s39, 8 -; GFX10_3-NEXT: v_writelane_b32 v21, s48, 9 -; GFX10_3-NEXT: v_writelane_b32 v21, s49, 10 -; GFX10_3-NEXT: v_writelane_b32 v21, s50, 11 -; GFX10_3-NEXT: v_writelane_b32 v21, s51, 12 -; GFX10_3-NEXT: v_writelane_b32 v21, s52, 13 -; GFX10_3-NEXT: v_writelane_b32 v21, s53, 14 -; GFX10_3-NEXT: v_writelane_b32 v21, s54, 15 -; GFX10_3-NEXT: v_writelane_b32 v21, s55, 16 +; GFX10_3-NEXT: v_writelane_b32 v21, s33, 0 +; GFX10_3-NEXT: v_writelane_b32 v21, s34, 1 +; GFX10_3-NEXT: v_writelane_b32 v21, s35, 2 +; GFX10_3-NEXT: v_writelane_b32 v21, s36, 3 +; GFX10_3-NEXT: v_writelane_b32 v21, s37, 4 +; GFX10_3-NEXT: v_writelane_b32 v21, s38, 5 +; GFX10_3-NEXT: v_writelane_b32 v21, s39, 6 +; GFX10_3-NEXT: v_writelane_b32 v21, s48, 7 +; GFX10_3-NEXT: v_writelane_b32 v21, s49, 8 +; GFX10_3-NEXT: v_writelane_b32 v21, s50, 9 +; GFX10_3-NEXT: v_writelane_b32 v21, s51, 10 +; GFX10_3-NEXT: v_writelane_b32 v21, s52, 11 +; GFX10_3-NEXT: v_writelane_b32 v21, s53, 12 +; GFX10_3-NEXT: v_writelane_b32 v21, s54, 13 +; GFX10_3-NEXT: v_writelane_b32 v21, s55, 14 +; GFX10_3-NEXT: v_writelane_b32 v21, s30, 15 +; GFX10_3-NEXT: v_writelane_b32 v21, s31, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32 ; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22 ; GFX10_3-NEXT: v_readfirstlane_b32 s54, v22 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s55, v21, 16 -; GFX10_3-NEXT: v_readlane_b32 s54, v21, 15 -; GFX10_3-NEXT: v_readlane_b32 s53, v21, 14 -; GFX10_3-NEXT: v_readlane_b32 s52, v21, 13 -; GFX10_3-NEXT: v_readlane_b32 s51, v21, 12 -; GFX10_3-NEXT: v_readlane_b32 s50, v21, 11 -; GFX10_3-NEXT: v_readlane_b32 s49, v21, 10 -; GFX10_3-NEXT: v_readlane_b32 s48, v21, 9 -; GFX10_3-NEXT: v_readlane_b32 s39, v21, 8 -; GFX10_3-NEXT: v_readlane_b32 s38, v21, 7 -; GFX10_3-NEXT: v_readlane_b32 s37, v21, 6 -; GFX10_3-NEXT: v_readlane_b32 s36, v21, 5 -; GFX10_3-NEXT: v_readlane_b32 s35, v21, 4 -; GFX10_3-NEXT: v_readlane_b32 s34, v21, 3 -; GFX10_3-NEXT: v_readlane_b32 s33, v21, 2 -; GFX10_3-NEXT: v_readlane_b32 s31, v21, 1 -; GFX10_3-NEXT: v_readlane_b32 s30, v21, 0 +; GFX10_3-NEXT: v_readlane_b32 s30, v21, 15 +; GFX10_3-NEXT: v_readlane_b32 s31, v21, 16 +; GFX10_3-NEXT: v_readlane_b32 s55, v21, 14 +; GFX10_3-NEXT: v_readlane_b32 s54, v21, 13 +; GFX10_3-NEXT: v_readlane_b32 s53, v21, 12 +; GFX10_3-NEXT: v_readlane_b32 s52, v21, 11 +; GFX10_3-NEXT: v_readlane_b32 s51, v21, 10 +; GFX10_3-NEXT: v_readlane_b32 s50, v21, 9 +; GFX10_3-NEXT: v_readlane_b32 s49, v21, 8 +; GFX10_3-NEXT: v_readlane_b32 s48, v21, 7 +; GFX10_3-NEXT: v_readlane_b32 s39, v21, 6 +; GFX10_3-NEXT: v_readlane_b32 s38, v21, 5 +; GFX10_3-NEXT: v_readlane_b32 s37, v21, 4 +; GFX10_3-NEXT: v_readlane_b32 s36, v21, 3 +; GFX10_3-NEXT: v_readlane_b32 s35, v21, 2 +; GFX10_3-NEXT: v_readlane_b32 s34, v21, 1 +; GFX10_3-NEXT: v_readlane_b32 s33, v21, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200 ; GFX10_3-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload @@ -969,24 +971,24 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX11-NEXT: s_add_i32 s1, s32, 0x4010 ; GFX11-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v21, s30, 0 +; GFX11-NEXT: v_writelane_b32 v21, s33, 0 +; GFX11-NEXT: v_writelane_b32 v21, s34, 1 +; GFX11-NEXT: v_writelane_b32 v21, s35, 2 +; GFX11-NEXT: v_writelane_b32 v21, s36, 3 +; GFX11-NEXT: v_writelane_b32 v21, s37, 4 +; GFX11-NEXT: v_writelane_b32 v21, s38, 5 +; GFX11-NEXT: v_writelane_b32 v21, s39, 6 +; GFX11-NEXT: v_writelane_b32 v21, s48, 7 +; GFX11-NEXT: v_writelane_b32 v21, s49, 8 +; GFX11-NEXT: v_writelane_b32 v21, s50, 9 +; GFX11-NEXT: v_writelane_b32 v21, s51, 10 +; GFX11-NEXT: v_writelane_b32 v21, s52, 11 +; GFX11-NEXT: v_writelane_b32 v21, s53, 12 +; GFX11-NEXT: v_writelane_b32 v21, s54, 13 +; GFX11-NEXT: v_writelane_b32 v21, s55, 14 +; GFX11-NEXT: v_writelane_b32 v21, s30, 15 +; GFX11-NEXT: v_writelane_b32 v21, s31, 16 ; GFX11-NEXT: s_and_b32 s59, 0, exec_lo -; GFX11-NEXT: v_writelane_b32 v21, s31, 1 -; GFX11-NEXT: v_writelane_b32 v21, s33, 2 -; GFX11-NEXT: v_writelane_b32 v21, s34, 3 -; GFX11-NEXT: v_writelane_b32 v21, s35, 4 -; GFX11-NEXT: v_writelane_b32 v21, s36, 5 -; GFX11-NEXT: v_writelane_b32 v21, s37, 6 -; GFX11-NEXT: v_writelane_b32 v21, s38, 7 -; GFX11-NEXT: v_writelane_b32 v21, s39, 8 -; GFX11-NEXT: v_writelane_b32 v21, s48, 9 -; GFX11-NEXT: v_writelane_b32 v21, s49, 10 -; GFX11-NEXT: v_writelane_b32 v21, s50, 11 -; GFX11-NEXT: v_writelane_b32 v21, s51, 12 -; GFX11-NEXT: v_writelane_b32 v21, s52, 13 -; GFX11-NEXT: v_writelane_b32 v21, s53, 14 -; GFX11-NEXT: v_writelane_b32 v21, s54, 15 -; GFX11-NEXT: v_writelane_b32 v21, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX11-NEXT: ;;#ASMEND @@ -999,23 +1001,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s55, v21, 16 -; GFX11-NEXT: v_readlane_b32 s54, v21, 15 -; GFX11-NEXT: v_readlane_b32 s53, v21, 14 -; GFX11-NEXT: v_readlane_b32 s52, v21, 13 -; GFX11-NEXT: v_readlane_b32 s51, v21, 12 -; GFX11-NEXT: v_readlane_b32 s50, v21, 11 -; GFX11-NEXT: v_readlane_b32 s49, v21, 10 -; GFX11-NEXT: v_readlane_b32 s48, v21, 9 -; GFX11-NEXT: v_readlane_b32 s39, v21, 8 -; GFX11-NEXT: v_readlane_b32 s38, v21, 7 -; GFX11-NEXT: v_readlane_b32 s37, v21, 6 -; GFX11-NEXT: v_readlane_b32 s36, v21, 5 -; GFX11-NEXT: v_readlane_b32 s35, v21, 4 -; GFX11-NEXT: v_readlane_b32 s34, v21, 3 -; GFX11-NEXT: v_readlane_b32 s33, v21, 2 -; GFX11-NEXT: v_readlane_b32 s31, v21, 1 -; GFX11-NEXT: v_readlane_b32 s30, v21, 0 +; GFX11-NEXT: v_readlane_b32 s30, v21, 15 +; GFX11-NEXT: v_readlane_b32 s31, v21, 16 +; GFX11-NEXT: v_readlane_b32 s55, v21, 14 +; GFX11-NEXT: v_readlane_b32 s54, v21, 13 +; GFX11-NEXT: v_readlane_b32 s53, v21, 12 +; GFX11-NEXT: v_readlane_b32 s52, v21, 11 +; GFX11-NEXT: v_readlane_b32 s51, v21, 10 +; GFX11-NEXT: v_readlane_b32 s50, v21, 9 +; GFX11-NEXT: v_readlane_b32 s49, v21, 8 +; GFX11-NEXT: v_readlane_b32 s48, v21, 7 +; GFX11-NEXT: v_readlane_b32 s39, v21, 6 +; GFX11-NEXT: v_readlane_b32 s38, v21, 5 +; GFX11-NEXT: v_readlane_b32 s37, v21, 4 +; GFX11-NEXT: v_readlane_b32 s36, v21, 3 +; GFX11-NEXT: v_readlane_b32 s35, v21, 2 +; GFX11-NEXT: v_readlane_b32 s34, v21, 1 +; GFX11-NEXT: v_readlane_b32 s33, v21, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x4010 ; GFX11-NEXT: scratch_load_b32 v21, off, s1 ; 4-byte Folded Reload @@ -1034,50 +1036,49 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX12-NEXT: scratch_store_b32 off, v21, s32 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v21, s30, 0 -; GFX12-NEXT: s_and_b32 s59, 0, exec_lo -; GFX12-NEXT: v_writelane_b32 v21, s31, 1 -; GFX12-NEXT: v_writelane_b32 v21, s33, 2 -; GFX12-NEXT: v_writelane_b32 v21, s34, 3 -; GFX12-NEXT: v_writelane_b32 v21, s35, 4 -; GFX12-NEXT: v_writelane_b32 v21, s36, 5 -; GFX12-NEXT: v_writelane_b32 v21, s37, 6 -; GFX12-NEXT: v_writelane_b32 v21, s38, 7 -; GFX12-NEXT: v_writelane_b32 v21, s39, 8 -; GFX12-NEXT: v_writelane_b32 v21, s48, 9 -; GFX12-NEXT: v_writelane_b32 v21, s49, 10 -; GFX12-NEXT: v_writelane_b32 v21, s50, 11 -; GFX12-NEXT: v_writelane_b32 v21, s51, 12 -; GFX12-NEXT: v_writelane_b32 v21, s52, 13 -; GFX12-NEXT: v_writelane_b32 v21, s53, 14 -; GFX12-NEXT: v_writelane_b32 v21, s54, 15 -; GFX12-NEXT: v_writelane_b32 v21, s55, 16 +; GFX12-NEXT: v_writelane_b32 v21, s33, 0 +; GFX12-NEXT: v_writelane_b32 v21, s34, 1 +; GFX12-NEXT: v_writelane_b32 v21, s35, 2 +; GFX12-NEXT: v_writelane_b32 v21, s36, 3 +; GFX12-NEXT: v_writelane_b32 v21, s37, 4 +; GFX12-NEXT: v_writelane_b32 v21, s38, 5 +; GFX12-NEXT: v_writelane_b32 v21, s39, 6 +; GFX12-NEXT: v_writelane_b32 v21, s48, 7 +; GFX12-NEXT: v_writelane_b32 v21, s49, 8 +; GFX12-NEXT: v_writelane_b32 v21, s50, 9 +; GFX12-NEXT: v_writelane_b32 v21, s51, 10 +; GFX12-NEXT: v_writelane_b32 v21, s52, 11 +; GFX12-NEXT: v_writelane_b32 v21, s53, 12 +; GFX12-NEXT: v_writelane_b32 v21, s54, 13 +; GFX12-NEXT: v_writelane_b32 v21, s55, 14 +; GFX12-NEXT: v_writelane_b32 v21, s30, 15 +; GFX12-NEXT: v_writelane_b32 v21, s31, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX12-NEXT: s_and_b32 s59, 0, exec_lo ; GFX12-NEXT: s_mov_b32 s54, s32 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_readlane_b32 s55, v21, 16 -; GFX12-NEXT: v_readlane_b32 s54, v21, 15 -; GFX12-NEXT: v_readlane_b32 s53, v21, 14 -; GFX12-NEXT: v_readlane_b32 s52, v21, 13 -; GFX12-NEXT: v_readlane_b32 s51, v21, 12 -; GFX12-NEXT: v_readlane_b32 s50, v21, 11 -; GFX12-NEXT: v_readlane_b32 s49, v21, 10 -; GFX12-NEXT: v_readlane_b32 s48, v21, 9 -; GFX12-NEXT: v_readlane_b32 s39, v21, 8 -; GFX12-NEXT: v_readlane_b32 s38, v21, 7 -; GFX12-NEXT: v_readlane_b32 s37, v21, 6 -; GFX12-NEXT: v_readlane_b32 s36, v21, 5 -; GFX12-NEXT: v_readlane_b32 s35, v21, 4 -; GFX12-NEXT: v_readlane_b32 s34, v21, 3 -; GFX12-NEXT: v_readlane_b32 s33, v21, 2 -; GFX12-NEXT: v_readlane_b32 s31, v21, 1 -; GFX12-NEXT: v_readlane_b32 s30, v21, 0 +; GFX12-NEXT: v_readlane_b32 s30, v21, 15 +; GFX12-NEXT: v_readlane_b32 s31, v21, 16 +; GFX12-NEXT: v_readlane_b32 s55, v21, 14 +; GFX12-NEXT: v_readlane_b32 s54, v21, 13 +; GFX12-NEXT: v_readlane_b32 s53, v21, 12 +; GFX12-NEXT: v_readlane_b32 s52, v21, 11 +; GFX12-NEXT: v_readlane_b32 s51, v21, 10 +; GFX12-NEXT: v_readlane_b32 s50, v21, 9 +; GFX12-NEXT: v_readlane_b32 s49, v21, 8 +; GFX12-NEXT: v_readlane_b32 s48, v21, 7 +; GFX12-NEXT: v_readlane_b32 s39, v21, 6 +; GFX12-NEXT: v_readlane_b32 s38, v21, 5 +; GFX12-NEXT: v_readlane_b32 s37, v21, 4 +; GFX12-NEXT: v_readlane_b32 s36, v21, 3 +; GFX12-NEXT: v_readlane_b32 s35, v21, 2 +; GFX12-NEXT: v_readlane_b32 s34, v21, 1 +; GFX12-NEXT: v_readlane_b32 s33, v21, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v21, off, s32 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe @@ -1135,30 +1136,30 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: v_writelane_b32 v23, s28, 17 ; GFX7-NEXT: v_writelane_b32 v23, s29, 18 -; GFX7-NEXT: v_writelane_b32 v23, s30, 0 -; GFX7-NEXT: v_writelane_b32 v23, s31, 1 -; GFX7-NEXT: v_writelane_b32 v23, s33, 2 -; GFX7-NEXT: v_writelane_b32 v23, s34, 3 -; GFX7-NEXT: v_writelane_b32 v23, s35, 4 -; GFX7-NEXT: v_writelane_b32 v23, s36, 5 -; GFX7-NEXT: v_writelane_b32 v23, s37, 6 -; GFX7-NEXT: v_writelane_b32 v23, s38, 7 -; GFX7-NEXT: v_writelane_b32 v23, s39, 8 -; GFX7-NEXT: v_writelane_b32 v23, s48, 9 -; GFX7-NEXT: v_writelane_b32 v23, s49, 10 -; GFX7-NEXT: v_writelane_b32 v23, s50, 11 -; GFX7-NEXT: v_writelane_b32 v23, s51, 12 -; GFX7-NEXT: v_writelane_b32 v23, s52, 13 +; GFX7-NEXT: v_writelane_b32 v23, s33, 0 +; GFX7-NEXT: v_writelane_b32 v23, s34, 1 +; GFX7-NEXT: v_writelane_b32 v23, s35, 2 +; GFX7-NEXT: v_writelane_b32 v23, s36, 3 +; GFX7-NEXT: v_writelane_b32 v23, s37, 4 +; GFX7-NEXT: v_writelane_b32 v23, s38, 5 +; GFX7-NEXT: v_writelane_b32 v23, s39, 6 +; GFX7-NEXT: v_writelane_b32 v23, s48, 7 +; GFX7-NEXT: v_writelane_b32 v23, s49, 8 +; GFX7-NEXT: v_writelane_b32 v23, s50, 9 +; GFX7-NEXT: v_writelane_b32 v23, s51, 10 +; GFX7-NEXT: v_writelane_b32 v23, s52, 11 +; GFX7-NEXT: v_writelane_b32 v23, s53, 12 +; GFX7-NEXT: v_writelane_b32 v23, s54, 13 +; GFX7-NEXT: v_writelane_b32 v23, s55, 14 +; GFX7-NEXT: v_writelane_b32 v23, s30, 15 +; GFX7-NEXT: v_writelane_b32 v23, s31, 16 ; GFX7-NEXT: s_lshr_b32 s5, s32, 6 -; GFX7-NEXT: v_writelane_b32 v23, s53, 14 ; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 ; GFX7-NEXT: s_add_i32 s4, s5, 0x4240 ; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane -; GFX7-NEXT: v_writelane_b32 v23, s54, 15 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 ; GFX7-NEXT: v_writelane_b32 v22, s4, 0 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec -; GFX7-NEXT: v_writelane_b32 v23, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use alloca0 v0 ; GFX7-NEXT: ;;#ASMEND @@ -1169,23 +1170,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND -; GFX7-NEXT: v_readlane_b32 s55, v23, 16 -; GFX7-NEXT: v_readlane_b32 s54, v23, 15 -; GFX7-NEXT: v_readlane_b32 s53, v23, 14 -; GFX7-NEXT: v_readlane_b32 s52, v23, 13 -; GFX7-NEXT: v_readlane_b32 s51, v23, 12 -; GFX7-NEXT: v_readlane_b32 s50, v23, 11 -; GFX7-NEXT: v_readlane_b32 s49, v23, 10 -; GFX7-NEXT: v_readlane_b32 s48, v23, 9 -; GFX7-NEXT: v_readlane_b32 s39, v23, 8 -; GFX7-NEXT: v_readlane_b32 s38, v23, 7 -; GFX7-NEXT: v_readlane_b32 s37, v23, 6 -; GFX7-NEXT: v_readlane_b32 s36, v23, 5 -; GFX7-NEXT: v_readlane_b32 s35, v23, 4 -; GFX7-NEXT: v_readlane_b32 s34, v23, 3 -; GFX7-NEXT: v_readlane_b32 s33, v23, 2 -; GFX7-NEXT: v_readlane_b32 s31, v23, 1 -; GFX7-NEXT: v_readlane_b32 s30, v23, 0 +; GFX7-NEXT: v_readlane_b32 s30, v23, 15 +; GFX7-NEXT: v_readlane_b32 s31, v23, 16 +; GFX7-NEXT: v_readlane_b32 s55, v23, 14 +; GFX7-NEXT: v_readlane_b32 s54, v23, 13 +; GFX7-NEXT: v_readlane_b32 s53, v23, 12 +; GFX7-NEXT: v_readlane_b32 s52, v23, 11 +; GFX7-NEXT: v_readlane_b32 s51, v23, 10 +; GFX7-NEXT: v_readlane_b32 s50, v23, 9 +; GFX7-NEXT: v_readlane_b32 s49, v23, 8 +; GFX7-NEXT: v_readlane_b32 s48, v23, 7 +; GFX7-NEXT: v_readlane_b32 s39, v23, 6 +; GFX7-NEXT: v_readlane_b32 s38, v23, 5 +; GFX7-NEXT: v_readlane_b32 s37, v23, 4 +; GFX7-NEXT: v_readlane_b32 s36, v23, 3 +; GFX7-NEXT: v_readlane_b32 s35, v23, 2 +; GFX7-NEXT: v_readlane_b32 s34, v23, 1 +; GFX7-NEXT: v_readlane_b32 s33, v23, 0 ; GFX7-NEXT: v_readlane_b32 s28, v23, 17 ; GFX7-NEXT: v_readlane_b32 s29, v23, 18 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -1206,30 +1207,30 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v23, s30, 0 -; GFX8-NEXT: v_writelane_b32 v23, s31, 1 -; GFX8-NEXT: v_writelane_b32 v23, s33, 2 -; GFX8-NEXT: v_writelane_b32 v23, s34, 3 -; GFX8-NEXT: v_writelane_b32 v23, s35, 4 -; GFX8-NEXT: v_writelane_b32 v23, s36, 5 -; GFX8-NEXT: v_writelane_b32 v23, s37, 6 -; GFX8-NEXT: v_writelane_b32 v23, s38, 7 -; GFX8-NEXT: v_writelane_b32 v23, s39, 8 -; GFX8-NEXT: v_writelane_b32 v23, s48, 9 -; GFX8-NEXT: v_writelane_b32 v23, s49, 10 -; GFX8-NEXT: v_writelane_b32 v23, s50, 11 -; GFX8-NEXT: v_writelane_b32 v23, s51, 12 -; GFX8-NEXT: v_writelane_b32 v23, s52, 13 +; GFX8-NEXT: v_writelane_b32 v23, s33, 0 +; GFX8-NEXT: v_writelane_b32 v23, s34, 1 +; GFX8-NEXT: v_writelane_b32 v23, s35, 2 +; GFX8-NEXT: v_writelane_b32 v23, s36, 3 +; GFX8-NEXT: v_writelane_b32 v23, s37, 4 +; GFX8-NEXT: v_writelane_b32 v23, s38, 5 +; GFX8-NEXT: v_writelane_b32 v23, s39, 6 +; GFX8-NEXT: v_writelane_b32 v23, s48, 7 +; GFX8-NEXT: v_writelane_b32 v23, s49, 8 +; GFX8-NEXT: v_writelane_b32 v23, s50, 9 +; GFX8-NEXT: v_writelane_b32 v23, s51, 10 +; GFX8-NEXT: v_writelane_b32 v23, s52, 11 +; GFX8-NEXT: v_writelane_b32 v23, s53, 12 +; GFX8-NEXT: v_writelane_b32 v23, s54, 13 +; GFX8-NEXT: v_writelane_b32 v23, s55, 14 +; GFX8-NEXT: v_writelane_b32 v23, s30, 15 +; GFX8-NEXT: v_writelane_b32 v23, s31, 16 ; GFX8-NEXT: s_lshr_b32 s5, s32, 6 -; GFX8-NEXT: v_writelane_b32 v23, s53, 14 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: s_add_i32 s4, s5, 0x4240 ; GFX8-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane -; GFX8-NEXT: v_writelane_b32 v23, s54, 15 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: v_writelane_b32 v22, s4, 0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_writelane_b32 v23, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND @@ -1241,23 +1242,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s55, v23, 16 -; GFX8-NEXT: v_readlane_b32 s54, v23, 15 -; GFX8-NEXT: v_readlane_b32 s53, v23, 14 -; GFX8-NEXT: v_readlane_b32 s52, v23, 13 -; GFX8-NEXT: v_readlane_b32 s51, v23, 12 -; GFX8-NEXT: v_readlane_b32 s50, v23, 11 -; GFX8-NEXT: v_readlane_b32 s49, v23, 10 -; GFX8-NEXT: v_readlane_b32 s48, v23, 9 -; GFX8-NEXT: v_readlane_b32 s39, v23, 8 -; GFX8-NEXT: v_readlane_b32 s38, v23, 7 -; GFX8-NEXT: v_readlane_b32 s37, v23, 6 -; GFX8-NEXT: v_readlane_b32 s36, v23, 5 -; GFX8-NEXT: v_readlane_b32 s35, v23, 4 -; GFX8-NEXT: v_readlane_b32 s34, v23, 3 -; GFX8-NEXT: v_readlane_b32 s33, v23, 2 -; GFX8-NEXT: v_readlane_b32 s31, v23, 1 -; GFX8-NEXT: v_readlane_b32 s30, v23, 0 +; GFX8-NEXT: v_readlane_b32 s30, v23, 15 +; GFX8-NEXT: v_readlane_b32 s31, v23, 16 +; GFX8-NEXT: v_readlane_b32 s55, v23, 14 +; GFX8-NEXT: v_readlane_b32 s54, v23, 13 +; GFX8-NEXT: v_readlane_b32 s53, v23, 12 +; GFX8-NEXT: v_readlane_b32 s52, v23, 11 +; GFX8-NEXT: v_readlane_b32 s51, v23, 10 +; GFX8-NEXT: v_readlane_b32 s50, v23, 9 +; GFX8-NEXT: v_readlane_b32 s49, v23, 8 +; GFX8-NEXT: v_readlane_b32 s48, v23, 7 +; GFX8-NEXT: v_readlane_b32 s39, v23, 6 +; GFX8-NEXT: v_readlane_b32 s38, v23, 5 +; GFX8-NEXT: v_readlane_b32 s37, v23, 4 +; GFX8-NEXT: v_readlane_b32 s36, v23, 3 +; GFX8-NEXT: v_readlane_b32 s35, v23, 2 +; GFX8-NEXT: v_readlane_b32 s34, v23, 1 +; GFX8-NEXT: v_readlane_b32 s33, v23, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -1276,30 +1277,30 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX900-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX900-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_writelane_b32 v23, s30, 0 -; GFX900-NEXT: v_writelane_b32 v23, s31, 1 -; GFX900-NEXT: v_writelane_b32 v23, s33, 2 -; GFX900-NEXT: v_writelane_b32 v23, s34, 3 -; GFX900-NEXT: v_writelane_b32 v23, s35, 4 -; GFX900-NEXT: v_writelane_b32 v23, s36, 5 -; GFX900-NEXT: v_writelane_b32 v23, s37, 6 -; GFX900-NEXT: v_writelane_b32 v23, s38, 7 -; GFX900-NEXT: v_writelane_b32 v23, s39, 8 -; GFX900-NEXT: v_writelane_b32 v23, s48, 9 -; GFX900-NEXT: v_writelane_b32 v23, s49, 10 -; GFX900-NEXT: v_writelane_b32 v23, s50, 11 -; GFX900-NEXT: v_writelane_b32 v23, s51, 12 -; GFX900-NEXT: v_writelane_b32 v23, s52, 13 +; GFX900-NEXT: v_writelane_b32 v23, s33, 0 +; GFX900-NEXT: v_writelane_b32 v23, s34, 1 +; GFX900-NEXT: v_writelane_b32 v23, s35, 2 +; GFX900-NEXT: v_writelane_b32 v23, s36, 3 +; GFX900-NEXT: v_writelane_b32 v23, s37, 4 +; GFX900-NEXT: v_writelane_b32 v23, s38, 5 +; GFX900-NEXT: v_writelane_b32 v23, s39, 6 +; GFX900-NEXT: v_writelane_b32 v23, s48, 7 +; GFX900-NEXT: v_writelane_b32 v23, s49, 8 +; GFX900-NEXT: v_writelane_b32 v23, s50, 9 +; GFX900-NEXT: v_writelane_b32 v23, s51, 10 +; GFX900-NEXT: v_writelane_b32 v23, s52, 11 +; GFX900-NEXT: v_writelane_b32 v23, s53, 12 +; GFX900-NEXT: v_writelane_b32 v23, s54, 13 +; GFX900-NEXT: v_writelane_b32 v23, s55, 14 +; GFX900-NEXT: v_writelane_b32 v23, s30, 15 +; GFX900-NEXT: v_writelane_b32 v23, s31, 16 ; GFX900-NEXT: s_lshr_b32 s5, s32, 6 -; GFX900-NEXT: v_writelane_b32 v23, s53, 14 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: s_add_i32 s4, s5, 0x4240 ; GFX900-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane -; GFX900-NEXT: v_writelane_b32 v23, s54, 15 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: v_writelane_b32 v22, s4, 0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_writelane_b32 v23, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND @@ -1311,23 +1312,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s55, v23, 16 -; GFX900-NEXT: v_readlane_b32 s54, v23, 15 -; GFX900-NEXT: v_readlane_b32 s53, v23, 14 -; GFX900-NEXT: v_readlane_b32 s52, v23, 13 -; GFX900-NEXT: v_readlane_b32 s51, v23, 12 -; GFX900-NEXT: v_readlane_b32 s50, v23, 11 -; GFX900-NEXT: v_readlane_b32 s49, v23, 10 -; GFX900-NEXT: v_readlane_b32 s48, v23, 9 -; GFX900-NEXT: v_readlane_b32 s39, v23, 8 -; GFX900-NEXT: v_readlane_b32 s38, v23, 7 -; GFX900-NEXT: v_readlane_b32 s37, v23, 6 -; GFX900-NEXT: v_readlane_b32 s36, v23, 5 -; GFX900-NEXT: v_readlane_b32 s35, v23, 4 -; GFX900-NEXT: v_readlane_b32 s34, v23, 3 -; GFX900-NEXT: v_readlane_b32 s33, v23, 2 -; GFX900-NEXT: v_readlane_b32 s31, v23, 1 -; GFX900-NEXT: v_readlane_b32 s30, v23, 0 +; GFX900-NEXT: v_readlane_b32 s30, v23, 15 +; GFX900-NEXT: v_readlane_b32 s31, v23, 16 +; GFX900-NEXT: v_readlane_b32 s55, v23, 14 +; GFX900-NEXT: v_readlane_b32 s54, v23, 13 +; GFX900-NEXT: v_readlane_b32 s53, v23, 12 +; GFX900-NEXT: v_readlane_b32 s52, v23, 11 +; GFX900-NEXT: v_readlane_b32 s51, v23, 10 +; GFX900-NEXT: v_readlane_b32 s50, v23, 9 +; GFX900-NEXT: v_readlane_b32 s49, v23, 8 +; GFX900-NEXT: v_readlane_b32 s48, v23, 7 +; GFX900-NEXT: v_readlane_b32 s39, v23, 6 +; GFX900-NEXT: v_readlane_b32 s38, v23, 5 +; GFX900-NEXT: v_readlane_b32 s37, v23, 4 +; GFX900-NEXT: v_readlane_b32 s36, v23, 3 +; GFX900-NEXT: v_readlane_b32 s35, v23, 2 +; GFX900-NEXT: v_readlane_b32 s34, v23, 1 +; GFX900-NEXT: v_readlane_b32 s33, v23, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -1344,25 +1345,26 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX942-NEXT: scratch_store_dword off, v22, s2 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: v_writelane_b32 v22, s30, 0 -; GFX942-NEXT: v_writelane_b32 v22, s31, 1 -; GFX942-NEXT: v_writelane_b32 v22, s33, 2 -; GFX942-NEXT: v_writelane_b32 v22, s34, 3 -; GFX942-NEXT: v_writelane_b32 v22, s35, 4 -; GFX942-NEXT: v_writelane_b32 v22, s36, 5 -; GFX942-NEXT: v_writelane_b32 v22, s37, 6 -; GFX942-NEXT: v_writelane_b32 v22, s38, 7 -; GFX942-NEXT: v_writelane_b32 v22, s39, 8 -; GFX942-NEXT: v_writelane_b32 v22, s48, 9 -; GFX942-NEXT: v_writelane_b32 v22, s49, 10 -; GFX942-NEXT: v_writelane_b32 v22, s50, 11 -; GFX942-NEXT: v_writelane_b32 v22, s51, 12 -; GFX942-NEXT: v_writelane_b32 v22, s52, 13 -; GFX942-NEXT: v_writelane_b32 v22, s53, 14 +; GFX942-NEXT: v_writelane_b32 v22, s33, 0 +; GFX942-NEXT: v_writelane_b32 v22, s34, 1 +; GFX942-NEXT: v_writelane_b32 v22, s35, 2 +; GFX942-NEXT: v_writelane_b32 v22, s36, 3 +; GFX942-NEXT: v_writelane_b32 v22, s37, 4 +; GFX942-NEXT: v_writelane_b32 v22, s38, 5 +; GFX942-NEXT: v_writelane_b32 v22, s39, 6 +; GFX942-NEXT: v_writelane_b32 v22, s48, 7 +; GFX942-NEXT: v_writelane_b32 v22, s49, 8 +; GFX942-NEXT: v_writelane_b32 v22, s50, 9 +; GFX942-NEXT: v_writelane_b32 v22, s51, 10 +; GFX942-NEXT: v_writelane_b32 v22, s52, 11 +; GFX942-NEXT: v_writelane_b32 v22, s53, 12 +; GFX942-NEXT: v_writelane_b32 v22, s54, 13 +; GFX942-NEXT: v_writelane_b32 v22, s55, 14 +; GFX942-NEXT: v_writelane_b32 v22, s30, 15 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v22, s31, 16 ; GFX942-NEXT: s_add_i32 s0, s32, 64 -; GFX942-NEXT: v_writelane_b32 v22, s54, 15 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 -; GFX942-NEXT: v_writelane_b32 v22, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND @@ -1376,23 +1378,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s55, v22, 16 -; GFX942-NEXT: v_readlane_b32 s54, v22, 15 -; GFX942-NEXT: v_readlane_b32 s53, v22, 14 -; GFX942-NEXT: v_readlane_b32 s52, v22, 13 -; GFX942-NEXT: v_readlane_b32 s51, v22, 12 -; GFX942-NEXT: v_readlane_b32 s50, v22, 11 -; GFX942-NEXT: v_readlane_b32 s49, v22, 10 -; GFX942-NEXT: v_readlane_b32 s48, v22, 9 -; GFX942-NEXT: v_readlane_b32 s39, v22, 8 -; GFX942-NEXT: v_readlane_b32 s38, v22, 7 -; GFX942-NEXT: v_readlane_b32 s37, v22, 6 -; GFX942-NEXT: v_readlane_b32 s36, v22, 5 -; GFX942-NEXT: v_readlane_b32 s35, v22, 4 -; GFX942-NEXT: v_readlane_b32 s34, v22, 3 -; GFX942-NEXT: v_readlane_b32 s33, v22, 2 -; GFX942-NEXT: v_readlane_b32 s31, v22, 1 -; GFX942-NEXT: v_readlane_b32 s30, v22, 0 +; GFX942-NEXT: v_readlane_b32 s30, v22, 15 +; GFX942-NEXT: v_readlane_b32 s31, v22, 16 +; GFX942-NEXT: v_readlane_b32 s55, v22, 14 +; GFX942-NEXT: v_readlane_b32 s54, v22, 13 +; GFX942-NEXT: v_readlane_b32 s53, v22, 12 +; GFX942-NEXT: v_readlane_b32 s52, v22, 11 +; GFX942-NEXT: v_readlane_b32 s51, v22, 10 +; GFX942-NEXT: v_readlane_b32 s50, v22, 9 +; GFX942-NEXT: v_readlane_b32 s49, v22, 8 +; GFX942-NEXT: v_readlane_b32 s48, v22, 7 +; GFX942-NEXT: v_readlane_b32 s39, v22, 6 +; GFX942-NEXT: v_readlane_b32 s38, v22, 5 +; GFX942-NEXT: v_readlane_b32 s37, v22, 4 +; GFX942-NEXT: v_readlane_b32 s36, v22, 3 +; GFX942-NEXT: v_readlane_b32 s35, v22, 2 +; GFX942-NEXT: v_readlane_b32 s34, v22, 1 +; GFX942-NEXT: v_readlane_b32 s33, v22, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 ; GFX942-NEXT: scratch_load_dword v22, off, s2 ; 4-byte Folded Reload @@ -1408,31 +1410,31 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_1-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_writelane_b32 v22, s30, 0 +; GFX10_1-NEXT: v_writelane_b32 v22, s33, 0 +; GFX10_1-NEXT: v_writelane_b32 v22, s34, 1 +; GFX10_1-NEXT: v_writelane_b32 v22, s35, 2 +; GFX10_1-NEXT: v_writelane_b32 v22, s36, 3 +; GFX10_1-NEXT: v_writelane_b32 v22, s37, 4 +; GFX10_1-NEXT: v_writelane_b32 v22, s38, 5 +; GFX10_1-NEXT: v_writelane_b32 v22, s39, 6 +; GFX10_1-NEXT: v_writelane_b32 v22, s48, 7 +; GFX10_1-NEXT: v_writelane_b32 v22, s49, 8 +; GFX10_1-NEXT: v_writelane_b32 v22, s50, 9 +; GFX10_1-NEXT: v_writelane_b32 v22, s51, 10 +; GFX10_1-NEXT: v_writelane_b32 v22, s52, 11 +; GFX10_1-NEXT: v_writelane_b32 v22, s53, 12 +; GFX10_1-NEXT: v_writelane_b32 v22, s54, 13 +; GFX10_1-NEXT: v_writelane_b32 v22, s55, 14 +; GFX10_1-NEXT: v_writelane_b32 v22, s30, 15 +; GFX10_1-NEXT: v_writelane_b32 v22, s31, 16 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 ; GFX10_1-NEXT: s_add_i32 s58, s4, 0x4240 -; GFX10_1-NEXT: v_writelane_b32 v22, s31, 1 -; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_writelane_b32 v22, s33, 2 -; GFX10_1-NEXT: v_writelane_b32 v22, s34, 3 -; GFX10_1-NEXT: v_writelane_b32 v22, s35, 4 -; GFX10_1-NEXT: v_writelane_b32 v22, s36, 5 -; GFX10_1-NEXT: v_writelane_b32 v22, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v22, s38, 7 -; GFX10_1-NEXT: v_writelane_b32 v22, s39, 8 -; GFX10_1-NEXT: v_writelane_b32 v22, s48, 9 -; GFX10_1-NEXT: v_writelane_b32 v22, s49, 10 -; GFX10_1-NEXT: v_writelane_b32 v22, s50, 11 -; GFX10_1-NEXT: v_writelane_b32 v22, s51, 12 -; GFX10_1-NEXT: v_writelane_b32 v22, s52, 13 -; GFX10_1-NEXT: v_writelane_b32 v22, s53, 14 -; GFX10_1-NEXT: v_writelane_b32 v22, s54, 15 -; GFX10_1-NEXT: v_writelane_b32 v22, s55, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_1-NEXT: ;;#ASMEND @@ -1441,23 +1443,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s55, v22, 16 -; GFX10_1-NEXT: v_readlane_b32 s54, v22, 15 -; GFX10_1-NEXT: v_readlane_b32 s53, v22, 14 -; GFX10_1-NEXT: v_readlane_b32 s52, v22, 13 -; GFX10_1-NEXT: v_readlane_b32 s51, v22, 12 -; GFX10_1-NEXT: v_readlane_b32 s50, v22, 11 -; GFX10_1-NEXT: v_readlane_b32 s49, v22, 10 -; GFX10_1-NEXT: v_readlane_b32 s48, v22, 9 -; GFX10_1-NEXT: v_readlane_b32 s39, v22, 8 -; GFX10_1-NEXT: v_readlane_b32 s38, v22, 7 -; GFX10_1-NEXT: v_readlane_b32 s37, v22, 6 -; GFX10_1-NEXT: v_readlane_b32 s36, v22, 5 -; GFX10_1-NEXT: v_readlane_b32 s35, v22, 4 -; GFX10_1-NEXT: v_readlane_b32 s34, v22, 3 -; GFX10_1-NEXT: v_readlane_b32 s33, v22, 2 -; GFX10_1-NEXT: v_readlane_b32 s31, v22, 1 -; GFX10_1-NEXT: v_readlane_b32 s30, v22, 0 +; GFX10_1-NEXT: v_readlane_b32 s30, v22, 15 +; GFX10_1-NEXT: v_readlane_b32 s31, v22, 16 +; GFX10_1-NEXT: v_readlane_b32 s55, v22, 14 +; GFX10_1-NEXT: v_readlane_b32 s54, v22, 13 +; GFX10_1-NEXT: v_readlane_b32 s53, v22, 12 +; GFX10_1-NEXT: v_readlane_b32 s52, v22, 11 +; GFX10_1-NEXT: v_readlane_b32 s51, v22, 10 +; GFX10_1-NEXT: v_readlane_b32 s50, v22, 9 +; GFX10_1-NEXT: v_readlane_b32 s49, v22, 8 +; GFX10_1-NEXT: v_readlane_b32 s48, v22, 7 +; GFX10_1-NEXT: v_readlane_b32 s39, v22, 6 +; GFX10_1-NEXT: v_readlane_b32 s38, v22, 5 +; GFX10_1-NEXT: v_readlane_b32 s37, v22, 4 +; GFX10_1-NEXT: v_readlane_b32 s36, v22, 3 +; GFX10_1-NEXT: v_readlane_b32 s35, v22, 2 +; GFX10_1-NEXT: v_readlane_b32 s34, v22, 1 +; GFX10_1-NEXT: v_readlane_b32 s33, v22, 0 ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_1-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload @@ -1473,31 +1475,31 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_3-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_writelane_b32 v22, s30, 0 +; GFX10_3-NEXT: v_writelane_b32 v22, s33, 0 +; GFX10_3-NEXT: v_writelane_b32 v22, s34, 1 +; GFX10_3-NEXT: v_writelane_b32 v22, s35, 2 +; GFX10_3-NEXT: v_writelane_b32 v22, s36, 3 +; GFX10_3-NEXT: v_writelane_b32 v22, s37, 4 +; GFX10_3-NEXT: v_writelane_b32 v22, s38, 5 +; GFX10_3-NEXT: v_writelane_b32 v22, s39, 6 +; GFX10_3-NEXT: v_writelane_b32 v22, s48, 7 +; GFX10_3-NEXT: v_writelane_b32 v22, s49, 8 +; GFX10_3-NEXT: v_writelane_b32 v22, s50, 9 +; GFX10_3-NEXT: v_writelane_b32 v22, s51, 10 +; GFX10_3-NEXT: v_writelane_b32 v22, s52, 11 +; GFX10_3-NEXT: v_writelane_b32 v22, s53, 12 +; GFX10_3-NEXT: v_writelane_b32 v22, s54, 13 +; GFX10_3-NEXT: v_writelane_b32 v22, s55, 14 +; GFX10_3-NEXT: v_writelane_b32 v22, s30, 15 +; GFX10_3-NEXT: v_writelane_b32 v22, s31, 16 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 ; GFX10_3-NEXT: s_add_i32 s58, s4, 0x4240 -; GFX10_3-NEXT: v_writelane_b32 v22, s31, 1 -; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_writelane_b32 v22, s33, 2 -; GFX10_3-NEXT: v_writelane_b32 v22, s34, 3 -; GFX10_3-NEXT: v_writelane_b32 v22, s35, 4 -; GFX10_3-NEXT: v_writelane_b32 v22, s36, 5 -; GFX10_3-NEXT: v_writelane_b32 v22, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v22, s38, 7 -; GFX10_3-NEXT: v_writelane_b32 v22, s39, 8 -; GFX10_3-NEXT: v_writelane_b32 v22, s48, 9 -; GFX10_3-NEXT: v_writelane_b32 v22, s49, 10 -; GFX10_3-NEXT: v_writelane_b32 v22, s50, 11 -; GFX10_3-NEXT: v_writelane_b32 v22, s51, 12 -; GFX10_3-NEXT: v_writelane_b32 v22, s52, 13 -; GFX10_3-NEXT: v_writelane_b32 v22, s53, 14 -; GFX10_3-NEXT: v_writelane_b32 v22, s54, 15 -; GFX10_3-NEXT: v_writelane_b32 v22, s55, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_3-NEXT: ;;#ASMEND @@ -1506,23 +1508,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s55, v22, 16 -; GFX10_3-NEXT: v_readlane_b32 s54, v22, 15 -; GFX10_3-NEXT: v_readlane_b32 s53, v22, 14 -; GFX10_3-NEXT: v_readlane_b32 s52, v22, 13 -; GFX10_3-NEXT: v_readlane_b32 s51, v22, 12 -; GFX10_3-NEXT: v_readlane_b32 s50, v22, 11 -; GFX10_3-NEXT: v_readlane_b32 s49, v22, 10 -; GFX10_3-NEXT: v_readlane_b32 s48, v22, 9 -; GFX10_3-NEXT: v_readlane_b32 s39, v22, 8 -; GFX10_3-NEXT: v_readlane_b32 s38, v22, 7 -; GFX10_3-NEXT: v_readlane_b32 s37, v22, 6 -; GFX10_3-NEXT: v_readlane_b32 s36, v22, 5 -; GFX10_3-NEXT: v_readlane_b32 s35, v22, 4 -; GFX10_3-NEXT: v_readlane_b32 s34, v22, 3 -; GFX10_3-NEXT: v_readlane_b32 s33, v22, 2 -; GFX10_3-NEXT: v_readlane_b32 s31, v22, 1 -; GFX10_3-NEXT: v_readlane_b32 s30, v22, 0 +; GFX10_3-NEXT: v_readlane_b32 s30, v22, 15 +; GFX10_3-NEXT: v_readlane_b32 s31, v22, 16 +; GFX10_3-NEXT: v_readlane_b32 s55, v22, 14 +; GFX10_3-NEXT: v_readlane_b32 s54, v22, 13 +; GFX10_3-NEXT: v_readlane_b32 s53, v22, 12 +; GFX10_3-NEXT: v_readlane_b32 s52, v22, 11 +; GFX10_3-NEXT: v_readlane_b32 s51, v22, 10 +; GFX10_3-NEXT: v_readlane_b32 s50, v22, 9 +; GFX10_3-NEXT: v_readlane_b32 s49, v22, 8 +; GFX10_3-NEXT: v_readlane_b32 s48, v22, 7 +; GFX10_3-NEXT: v_readlane_b32 s39, v22, 6 +; GFX10_3-NEXT: v_readlane_b32 s38, v22, 5 +; GFX10_3-NEXT: v_readlane_b32 s37, v22, 4 +; GFX10_3-NEXT: v_readlane_b32 s36, v22, 3 +; GFX10_3-NEXT: v_readlane_b32 s35, v22, 2 +; GFX10_3-NEXT: v_readlane_b32 s34, v22, 1 +; GFX10_3-NEXT: v_readlane_b32 s33, v22, 0 ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 ; GFX10_3-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload @@ -1537,30 +1539,30 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX11-NEXT: scratch_store_b32 off, v22, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v22, s30, 0 +; GFX11-NEXT: v_writelane_b32 v22, s33, 0 +; GFX11-NEXT: v_writelane_b32 v22, s34, 1 +; GFX11-NEXT: v_writelane_b32 v22, s35, 2 +; GFX11-NEXT: v_writelane_b32 v22, s36, 3 +; GFX11-NEXT: v_writelane_b32 v22, s37, 4 +; GFX11-NEXT: v_writelane_b32 v22, s38, 5 +; GFX11-NEXT: v_writelane_b32 v22, s39, 6 +; GFX11-NEXT: v_writelane_b32 v22, s48, 7 +; GFX11-NEXT: v_writelane_b32 v22, s49, 8 +; GFX11-NEXT: v_writelane_b32 v22, s50, 9 +; GFX11-NEXT: v_writelane_b32 v22, s51, 10 +; GFX11-NEXT: v_writelane_b32 v22, s52, 11 +; GFX11-NEXT: v_writelane_b32 v22, s53, 12 +; GFX11-NEXT: v_writelane_b32 v22, s54, 13 +; GFX11-NEXT: v_writelane_b32 v22, s55, 14 +; GFX11-NEXT: v_writelane_b32 v22, s30, 15 +; GFX11-NEXT: v_writelane_b32 v22, s31, 16 ; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: s_add_i32 s58, s32, 0x4240 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo -; GFX11-NEXT: v_writelane_b32 v22, s31, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v22, s33, 2 -; GFX11-NEXT: v_writelane_b32 v22, s34, 3 -; GFX11-NEXT: v_writelane_b32 v22, s35, 4 -; GFX11-NEXT: v_writelane_b32 v22, s36, 5 -; GFX11-NEXT: v_writelane_b32 v22, s37, 6 -; GFX11-NEXT: v_writelane_b32 v22, s38, 7 -; GFX11-NEXT: v_writelane_b32 v22, s39, 8 -; GFX11-NEXT: v_writelane_b32 v22, s48, 9 -; GFX11-NEXT: v_writelane_b32 v22, s49, 10 -; GFX11-NEXT: v_writelane_b32 v22, s50, 11 -; GFX11-NEXT: v_writelane_b32 v22, s51, 12 -; GFX11-NEXT: v_writelane_b32 v22, s52, 13 -; GFX11-NEXT: v_writelane_b32 v22, s53, 14 -; GFX11-NEXT: v_writelane_b32 v22, s54, 15 -; GFX11-NEXT: v_writelane_b32 v22, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX11-NEXT: ;;#ASMEND @@ -1569,24 +1571,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readlane_b32 s55, v22, 16 -; GFX11-NEXT: v_readlane_b32 s54, v22, 15 -; GFX11-NEXT: v_readlane_b32 s53, v22, 14 -; GFX11-NEXT: v_readlane_b32 s52, v22, 13 -; GFX11-NEXT: v_readlane_b32 s51, v22, 12 -; GFX11-NEXT: v_readlane_b32 s50, v22, 11 -; GFX11-NEXT: v_readlane_b32 s49, v22, 10 -; GFX11-NEXT: v_readlane_b32 s48, v22, 9 -; GFX11-NEXT: v_readlane_b32 s39, v22, 8 -; GFX11-NEXT: v_readlane_b32 s38, v22, 7 -; GFX11-NEXT: v_readlane_b32 s37, v22, 6 -; GFX11-NEXT: v_readlane_b32 s36, v22, 5 -; GFX11-NEXT: v_readlane_b32 s35, v22, 4 -; GFX11-NEXT: v_readlane_b32 s34, v22, 3 -; GFX11-NEXT: v_readlane_b32 s33, v22, 2 -; GFX11-NEXT: v_readlane_b32 s31, v22, 1 -; GFX11-NEXT: v_readlane_b32 s30, v22, 0 +; GFX11-NEXT: v_readlane_b32 s30, v22, 15 +; GFX11-NEXT: v_readlane_b32 s31, v22, 16 +; GFX11-NEXT: v_readlane_b32 s55, v22, 14 +; GFX11-NEXT: v_readlane_b32 s54, v22, 13 +; GFX11-NEXT: v_readlane_b32 s53, v22, 12 +; GFX11-NEXT: v_readlane_b32 s52, v22, 11 +; GFX11-NEXT: v_readlane_b32 s51, v22, 10 +; GFX11-NEXT: v_readlane_b32 s50, v22, 9 +; GFX11-NEXT: v_readlane_b32 s49, v22, 8 +; GFX11-NEXT: v_readlane_b32 s48, v22, 7 +; GFX11-NEXT: v_readlane_b32 s39, v22, 6 +; GFX11-NEXT: v_readlane_b32 s38, v22, 5 +; GFX11-NEXT: v_readlane_b32 s37, v22, 4 +; GFX11-NEXT: v_readlane_b32 s36, v22, 3 +; GFX11-NEXT: v_readlane_b32 s35, v22, 2 +; GFX11-NEXT: v_readlane_b32 s34, v22, 1 +; GFX11-NEXT: v_readlane_b32 s33, v22, 0 ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 ; GFX11-NEXT: scratch_load_b32 v22, off, s1 ; 4-byte Folded Reload @@ -1605,29 +1606,29 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX12-NEXT: scratch_store_b32 off, v22, s32 offset:32768 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v22, s30, 0 +; GFX12-NEXT: v_writelane_b32 v22, s33, 0 +; GFX12-NEXT: v_writelane_b32 v22, s34, 1 +; GFX12-NEXT: v_writelane_b32 v22, s35, 2 +; GFX12-NEXT: v_writelane_b32 v22, s36, 3 +; GFX12-NEXT: v_writelane_b32 v22, s37, 4 +; GFX12-NEXT: v_writelane_b32 v22, s38, 5 +; GFX12-NEXT: v_writelane_b32 v22, s39, 6 +; GFX12-NEXT: v_writelane_b32 v22, s48, 7 +; GFX12-NEXT: v_writelane_b32 v22, s49, 8 +; GFX12-NEXT: v_writelane_b32 v22, s50, 9 +; GFX12-NEXT: v_writelane_b32 v22, s51, 10 +; GFX12-NEXT: v_writelane_b32 v22, s52, 11 +; GFX12-NEXT: v_writelane_b32 v22, s53, 12 +; GFX12-NEXT: v_writelane_b32 v22, s54, 13 +; GFX12-NEXT: v_writelane_b32 v22, s55, 14 +; GFX12-NEXT: v_writelane_b32 v22, s30, 15 +; GFX12-NEXT: v_writelane_b32 v22, s31, 16 ; GFX12-NEXT: s_add_co_i32 s58, s32, 0x4200 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_writelane_b32 v22, s31, 1 -; GFX12-NEXT: v_writelane_b32 v22, s33, 2 -; GFX12-NEXT: v_writelane_b32 v22, s34, 3 -; GFX12-NEXT: v_writelane_b32 v22, s35, 4 -; GFX12-NEXT: v_writelane_b32 v22, s36, 5 -; GFX12-NEXT: v_writelane_b32 v22, s37, 6 -; GFX12-NEXT: v_writelane_b32 v22, s38, 7 -; GFX12-NEXT: v_writelane_b32 v22, s39, 8 -; GFX12-NEXT: v_writelane_b32 v22, s48, 9 -; GFX12-NEXT: v_writelane_b32 v22, s49, 10 -; GFX12-NEXT: v_writelane_b32 v22, s50, 11 -; GFX12-NEXT: v_writelane_b32 v22, s51, 12 -; GFX12-NEXT: v_writelane_b32 v22, s52, 13 -; GFX12-NEXT: v_writelane_b32 v22, s53, 14 -; GFX12-NEXT: v_writelane_b32 v22, s54, 15 -; GFX12-NEXT: v_writelane_b32 v22, s55, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX12-NEXT: ;;#ASMEND @@ -1637,23 +1638,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s55, v22, 16 -; GFX12-NEXT: v_readlane_b32 s54, v22, 15 -; GFX12-NEXT: v_readlane_b32 s53, v22, 14 -; GFX12-NEXT: v_readlane_b32 s52, v22, 13 -; GFX12-NEXT: v_readlane_b32 s51, v22, 12 -; GFX12-NEXT: v_readlane_b32 s50, v22, 11 -; GFX12-NEXT: v_readlane_b32 s49, v22, 10 -; GFX12-NEXT: v_readlane_b32 s48, v22, 9 -; GFX12-NEXT: v_readlane_b32 s39, v22, 8 -; GFX12-NEXT: v_readlane_b32 s38, v22, 7 -; GFX12-NEXT: v_readlane_b32 s37, v22, 6 -; GFX12-NEXT: v_readlane_b32 s36, v22, 5 -; GFX12-NEXT: v_readlane_b32 s35, v22, 4 -; GFX12-NEXT: v_readlane_b32 s34, v22, 3 -; GFX12-NEXT: v_readlane_b32 s33, v22, 2 -; GFX12-NEXT: v_readlane_b32 s31, v22, 1 -; GFX12-NEXT: v_readlane_b32 s30, v22, 0 +; GFX12-NEXT: v_readlane_b32 s30, v22, 15 +; GFX12-NEXT: v_readlane_b32 s31, v22, 16 +; GFX12-NEXT: v_readlane_b32 s55, v22, 14 +; GFX12-NEXT: v_readlane_b32 s54, v22, 13 +; GFX12-NEXT: v_readlane_b32 s53, v22, 12 +; GFX12-NEXT: v_readlane_b32 s52, v22, 11 +; GFX12-NEXT: v_readlane_b32 s51, v22, 10 +; GFX12-NEXT: v_readlane_b32 s50, v22, 9 +; GFX12-NEXT: v_readlane_b32 s49, v22, 8 +; GFX12-NEXT: v_readlane_b32 s48, v22, 7 +; GFX12-NEXT: v_readlane_b32 s39, v22, 6 +; GFX12-NEXT: v_readlane_b32 s38, v22, 5 +; GFX12-NEXT: v_readlane_b32 s37, v22, 4 +; GFX12-NEXT: v_readlane_b32 s36, v22, 3 +; GFX12-NEXT: v_readlane_b32 s35, v22, 2 +; GFX12-NEXT: v_readlane_b32 s34, v22, 1 +; GFX12-NEXT: v_readlane_b32 s33, v22, 0 ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v22, off, s32 offset:32768 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.bf16.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.bf16.ll index ca16e251d51cf..e84d3c913328c 100644 --- a/llvm/test/CodeGen/AMDGPU/maximumnum.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/maximumnum.bf16.ll @@ -8941,6 +8941,9 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX8-LABEL: v_maximumnum_v32bf16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX8-NEXT: buffer_load_dword v55, off, s[0:3], s32 ; GFX8-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX8-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -8989,13 +8992,10 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX8-NEXT: v_and_b32_e32 v52, 0xffff0000, v21 ; GFX8-NEXT: v_and_b32_e32 v53, 0xffff0000, v20 ; GFX8-NEXT: v_and_b32_e32 v54, 0xffff0000, v19 -; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX8-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 ; GFX8-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 ; GFX8-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 -; GFX8-NEXT: s_waitcnt vmcnt(3) +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_lshrrev_b32_e32 v35, 16, v55 ; GFX8-NEXT: v_and_b32_e32 v37, 0xffff0000, v55 ; GFX8-NEXT: v_cndmask_b32_e32 v32, v33, v35, vcc @@ -9563,6 +9563,9 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX900-LABEL: v_maximumnum_v32bf16: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: buffer_load_dword v55, off, s[0:3], s32 ; GFX900-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX900-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -9611,14 +9614,11 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX900-NEXT: v_and_b32_e32 v52, 0xffff0000, v21 ; GFX900-NEXT: v_and_b32_e32 v53, 0xffff0000, v20 ; GFX900-NEXT: v_and_b32_e32 v54, 0xffff0000, v19 -; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 ; GFX900-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 ; GFX900-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 ; GFX900-NEXT: s_mov_b32 s4, 0x5040100 -; GFX900-NEXT: s_waitcnt vmcnt(3) +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_lshrrev_b32_e32 v35, 16, v55 ; GFX900-NEXT: v_and_b32_e32 v37, 0xffff0000, v55 ; GFX900-NEXT: v_cndmask_b32_e32 v32, v33, v35, vcc @@ -10170,6 +10170,9 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX950-LABEL: v_maximumnum_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v50, off, s32 ; GFX950-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX950-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -10210,21 +10213,18 @@ define <32 x bfloat> @v_maximumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v19 ; GFX950-NEXT: v_cndmask_b32_e32 v35, v36, v35, vcc ; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v34 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 +; GFX950-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 ; GFX950-NEXT: v_cndmask_b32_e32 v31, v31, v34, vcc ; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, 0, v38 -; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse -; GFX950-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 +; GFX950-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 +; GFX950-NEXT: s_mov_b32 s0, 0x5040100 ; GFX950-NEXT: v_cndmask_b32_e32 v34, v35, v38, vcc ; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v39 ; GFX950-NEXT: v_and_b32_e32 v38, 0xffff0000, v27 ; GFX950-NEXT: v_and_b32_e32 v39, 0xffff0000, v26 ; GFX950-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc ; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v32, v32 -; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse -; GFX950-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 -; GFX950-NEXT: s_mov_b32 s0, 0x5040100 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_lshrrev_b32_e32 v35, 16, v50 ; GFX950-NEXT: v_and_b32_e32 v37, 0xffff0000, v50 diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll index 02f39e25cb447..06213ef3e06ea 100644 --- a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll +++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll @@ -81,7 +81,6 @@ define void @memcpy_p0_p0_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(0) ; ALIGNED-LABEL: memcpy_p0_p0_sz2048: ; ALIGNED: ; %bb.0: ; %entry ; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -90,6 +89,7 @@ define void @memcpy_p0_p0_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(0) ; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: .LBB0_1: ; %load-store-loop ; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 ; ALIGNED-NEXT: v_add_co_u32 v24, vcc_lo, v2, s4 @@ -837,7 +837,6 @@ define void @memcpy_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1) ; ALIGNED-LABEL: memcpy_p1_p1_sz2048: ; ALIGNED: ; %bb.0: ; %entry ; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -846,6 +845,7 @@ define void @memcpy_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1) ; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: .LBB1_1: ; %load-store-loop ; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 ; ALIGNED-NEXT: v_add_co_u32 v24, vcc_lo, v2, s4 @@ -2340,7 +2340,6 @@ define void @memcpy_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5) ; ALIGNED-LABEL: memcpy_p5_p5_sz2048: ; ALIGNED: ; %bb.0: ; %entry ; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill @@ -2389,6 +2388,7 @@ define void @memcpy_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5) ; ALIGNED-NEXT: buffer_store_dword v125, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; ALIGNED-NEXT: buffer_store_dword v127, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 ; ALIGNED-NEXT: .LBB3_1: ; %load-store-loop ; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 ; ALIGNED-NEXT: s_clause 0x34 diff --git a/llvm/test/CodeGen/AMDGPU/minimumnum.bf16.ll b/llvm/test/CodeGen/AMDGPU/minimumnum.bf16.ll index 416a601797617..8184e1927146d 100644 --- a/llvm/test/CodeGen/AMDGPU/minimumnum.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/minimumnum.bf16.ll @@ -8980,6 +8980,9 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX8-LABEL: v_minimumnum_v32bf16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX8-NEXT: buffer_load_dword v55, off, s[0:3], s32 ; GFX8-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX8-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -9029,13 +9032,10 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX8-NEXT: v_and_b32_e32 v52, 0xffff0000, v21 ; GFX8-NEXT: v_and_b32_e32 v53, 0xffff0000, v20 ; GFX8-NEXT: v_and_b32_e32 v54, 0xffff0000, v19 -; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX8-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 ; GFX8-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 ; GFX8-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 -; GFX8-NEXT: s_waitcnt vmcnt(3) +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_lshrrev_b32_e32 v35, 16, v55 ; GFX8-NEXT: v_and_b32_e32 v37, 0xffff0000, v55 ; GFX8-NEXT: v_cndmask_b32_e32 v32, v33, v35, vcc @@ -9603,6 +9603,9 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX900-LABEL: v_minimumnum_v32bf16: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: buffer_load_dword v55, off, s[0:3], s32 ; GFX900-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX900-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -9652,13 +9655,10 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX900-NEXT: v_and_b32_e32 v52, 0xffff0000, v21 ; GFX900-NEXT: v_and_b32_e32 v53, 0xffff0000, v20 ; GFX900-NEXT: v_and_b32_e32 v54, 0xffff0000, v19 -; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 ; GFX900-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 ; GFX900-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 -; GFX900-NEXT: s_waitcnt vmcnt(3) +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_lshrrev_b32_e32 v35, 16, v55 ; GFX900-NEXT: v_and_b32_e32 v37, 0xffff0000, v55 ; GFX900-NEXT: v_cndmask_b32_e32 v32, v33, v35, vcc @@ -10211,6 +10211,9 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX950-LABEL: v_minimumnum_v32bf16: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse ; GFX950-NEXT: scratch_load_dword v50, off, s32 ; GFX950-NEXT: v_and_b32_e32 v31, 0xffff0000, v14 ; GFX950-NEXT: v_lshrrev_b32_e32 v34, 16, v30 @@ -10252,20 +10255,17 @@ define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) { ; GFX950-NEXT: v_cndmask_b32_e32 v35, v36, v35, vcc ; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, s0, v34 ; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v19 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 ; GFX950-NEXT: v_cndmask_b32_e32 v31, v31, v34, vcc ; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, s0, v38 -; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v18 -; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX950-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 +; GFX950-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 ; GFX950-NEXT: v_cndmask_b32_e32 v34, v35, v38, vcc ; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v39 ; GFX950-NEXT: v_and_b32_e32 v38, 0xffff0000, v27 ; GFX950-NEXT: v_and_b32_e32 v39, 0xffff0000, v26 ; GFX950-NEXT: v_cndmask_b32_e32 v31, v37, v31, vcc ; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v32, v32 -; GFX950-NEXT: v_and_b32_e32 v41, 0xffff0000, v17 -; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse -; GFX950-NEXT: v_and_b32_e32 v42, 0xffff0000, v16 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_lshrrev_b32_e32 v35, 16, v50 ; GFX950-NEXT: v_and_b32_e32 v37, 0xffff0000, v50 diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll index 33cd598aae9b5..486a08d6ee8cd 100644 --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -194,19 +194,19 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: v_writelane_b32 v43, s4, 5 -; GFX9-NEXT: v_writelane_b32 v43, s30, 0 -; GFX9-NEXT: v_writelane_b32 v43, s31, 1 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v43, s34, 2 -; GFX9-NEXT: v_writelane_b32 v43, s36, 3 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v43, s34, 0 +; GFX9-NEXT: v_writelane_b32 v43, s36, 1 +; GFX9-NEXT: v_writelane_b32 v43, s37, 2 +; GFX9-NEXT: v_writelane_b32 v43, s30, 3 +; GFX9-NEXT: v_writelane_b32 v43, s31, 4 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12 -; GFX9-NEXT: v_writelane_b32 v43, s37, 4 ; GFX9-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v40, v1 ; GFX9-NEXT: v_mov_b32_e32 v41, v0 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v41, v40 @@ -224,11 +224,11 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s37, v43, 4 -; GFX9-NEXT: v_readlane_b32 s36, v43, 3 -; GFX9-NEXT: v_readlane_b32 s34, v43, 2 -; GFX9-NEXT: v_readlane_b32 s31, v43, 1 -; GFX9-NEXT: v_readlane_b32 s30, v43, 0 +; GFX9-NEXT: v_readlane_b32 s30, v43, 3 +; GFX9-NEXT: v_readlane_b32 s31, v43, 4 +; GFX9-NEXT: v_readlane_b32 s37, v43, 2 +; GFX9-NEXT: v_readlane_b32 s36, v43, 1 +; GFX9-NEXT: v_readlane_b32 s34, v43, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v43, 5 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll index 65446a036c91b..878302e4865bb 100644 --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -47,8 +47,8 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; clobber csr v40 ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: v_readlane_b32 s30, v1, 0 +; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -190,8 +190,8 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 { ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: v_readlane_b32 s30, v1, 0 +; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -224,8 +224,8 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 { ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v2, 1 ; CHECK-NEXT: v_readlane_b32 s30, v2, 0 +; CHECK-NEXT: v_readlane_b32 s31, v2, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll index ccaf0ac5377e4..da9463b1329c7 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -18,19 +18,19 @@ define void @test_func_call_external_void_func_i32_imm() #0 { ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s16, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, external_void_func_i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, external_void_func_i32@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: v_mov_b32_e32 v0, 42 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -52,24 +52,24 @@ define void @test_func_call_external_void_func_i32_imm_stack_use() #0 { ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: s_addk_i32 s32, 0x1400 ; GCN-NEXT: v_writelane_b32 v40, s16, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x1400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, external_void_func_i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, external_void_func_i32@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:64 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, 42 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll index e6243f0e41826..da30190663457 100644 --- a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -12,16 +12,228 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: .cfi_sections .debug_frame ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 2565 +; CHECK-NEXT: .cfi_undefined 2566 +; CHECK-NEXT: .cfi_undefined 2567 +; CHECK-NEXT: .cfi_undefined 2568 +; CHECK-NEXT: .cfi_undefined 2569 +; CHECK-NEXT: .cfi_undefined 2570 +; CHECK-NEXT: .cfi_undefined 2571 +; CHECK-NEXT: .cfi_undefined 2572 +; CHECK-NEXT: .cfi_undefined 2573 +; CHECK-NEXT: .cfi_undefined 2574 +; CHECK-NEXT: .cfi_undefined 2575 +; CHECK-NEXT: .cfi_undefined 2576 +; CHECK-NEXT: .cfi_undefined 2577 +; CHECK-NEXT: .cfi_undefined 2578 +; CHECK-NEXT: .cfi_undefined 2579 +; CHECK-NEXT: .cfi_undefined 2580 +; CHECK-NEXT: .cfi_undefined 2581 +; CHECK-NEXT: .cfi_undefined 2582 +; CHECK-NEXT: .cfi_undefined 2583 +; CHECK-NEXT: .cfi_undefined 2584 +; CHECK-NEXT: .cfi_undefined 2585 +; CHECK-NEXT: .cfi_undefined 2586 +; CHECK-NEXT: .cfi_undefined 2587 +; CHECK-NEXT: .cfi_undefined 2588 +; CHECK-NEXT: .cfi_undefined 2589 +; CHECK-NEXT: .cfi_undefined 2590 +; CHECK-NEXT: .cfi_undefined 2591 +; CHECK-NEXT: .cfi_undefined 2592 +; CHECK-NEXT: .cfi_undefined 2593 +; CHECK-NEXT: .cfi_undefined 2594 +; CHECK-NEXT: .cfi_undefined 2595 +; CHECK-NEXT: .cfi_undefined 2596 +; CHECK-NEXT: .cfi_undefined 2597 +; CHECK-NEXT: .cfi_undefined 2598 +; CHECK-NEXT: .cfi_undefined 2599 +; CHECK-NEXT: .cfi_undefined 2608 +; CHECK-NEXT: .cfi_undefined 2609 +; CHECK-NEXT: .cfi_undefined 2610 +; CHECK-NEXT: .cfi_undefined 2611 +; CHECK-NEXT: .cfi_undefined 2612 +; CHECK-NEXT: .cfi_undefined 2613 +; CHECK-NEXT: .cfi_undefined 2614 +; CHECK-NEXT: .cfi_undefined 2615 +; CHECK-NEXT: .cfi_undefined 2624 +; CHECK-NEXT: .cfi_undefined 2625 +; CHECK-NEXT: .cfi_undefined 2626 +; CHECK-NEXT: .cfi_undefined 2627 +; CHECK-NEXT: .cfi_undefined 2628 +; CHECK-NEXT: .cfi_undefined 2629 +; CHECK-NEXT: .cfi_undefined 2630 +; CHECK-NEXT: .cfi_undefined 2631 +; CHECK-NEXT: .cfi_undefined 2640 +; CHECK-NEXT: .cfi_undefined 2641 +; CHECK-NEXT: .cfi_undefined 2642 +; CHECK-NEXT: .cfi_undefined 2643 +; CHECK-NEXT: .cfi_undefined 2644 +; CHECK-NEXT: .cfi_undefined 2645 +; CHECK-NEXT: .cfi_undefined 2646 +; CHECK-NEXT: .cfi_undefined 2647 +; CHECK-NEXT: .cfi_undefined 2656 +; CHECK-NEXT: .cfi_undefined 2657 +; CHECK-NEXT: .cfi_undefined 2658 +; CHECK-NEXT: .cfi_undefined 2659 +; CHECK-NEXT: .cfi_undefined 2660 +; CHECK-NEXT: .cfi_undefined 2661 +; CHECK-NEXT: .cfi_undefined 2662 +; CHECK-NEXT: .cfi_undefined 2663 +; CHECK-NEXT: .cfi_undefined 2672 +; CHECK-NEXT: .cfi_undefined 2673 +; CHECK-NEXT: .cfi_undefined 2674 +; CHECK-NEXT: .cfi_undefined 2675 +; CHECK-NEXT: .cfi_undefined 2676 +; CHECK-NEXT: .cfi_undefined 2677 +; CHECK-NEXT: .cfi_undefined 2678 +; CHECK-NEXT: .cfi_undefined 2679 +; CHECK-NEXT: .cfi_undefined 2688 +; CHECK-NEXT: .cfi_undefined 2689 +; CHECK-NEXT: .cfi_undefined 2690 +; CHECK-NEXT: .cfi_undefined 2691 +; CHECK-NEXT: .cfi_undefined 2692 +; CHECK-NEXT: .cfi_undefined 2693 +; CHECK-NEXT: .cfi_undefined 2694 +; CHECK-NEXT: .cfi_undefined 2695 +; CHECK-NEXT: .cfi_undefined 2704 +; CHECK-NEXT: .cfi_undefined 2705 +; CHECK-NEXT: .cfi_undefined 2706 +; CHECK-NEXT: .cfi_undefined 2707 +; CHECK-NEXT: .cfi_undefined 2708 +; CHECK-NEXT: .cfi_undefined 2709 +; CHECK-NEXT: .cfi_undefined 2710 +; CHECK-NEXT: .cfi_undefined 2711 +; CHECK-NEXT: .cfi_undefined 2720 +; CHECK-NEXT: .cfi_undefined 2721 +; CHECK-NEXT: .cfi_undefined 2722 +; CHECK-NEXT: .cfi_undefined 2723 +; CHECK-NEXT: .cfi_undefined 2724 +; CHECK-NEXT: .cfi_undefined 2725 +; CHECK-NEXT: .cfi_undefined 2726 +; CHECK-NEXT: .cfi_undefined 2727 +; CHECK-NEXT: .cfi_undefined 2736 +; CHECK-NEXT: .cfi_undefined 2737 +; CHECK-NEXT: .cfi_undefined 2738 +; CHECK-NEXT: .cfi_undefined 2739 +; CHECK-NEXT: .cfi_undefined 2740 +; CHECK-NEXT: .cfi_undefined 2741 +; CHECK-NEXT: .cfi_undefined 2742 +; CHECK-NEXT: .cfi_undefined 2743 +; CHECK-NEXT: .cfi_undefined 2752 +; CHECK-NEXT: .cfi_undefined 2753 +; CHECK-NEXT: .cfi_undefined 2754 +; CHECK-NEXT: .cfi_undefined 2755 +; CHECK-NEXT: .cfi_undefined 2756 +; CHECK-NEXT: .cfi_undefined 2757 +; CHECK-NEXT: .cfi_undefined 2758 +; CHECK-NEXT: .cfi_undefined 2759 +; CHECK-NEXT: .cfi_undefined 2768 +; CHECK-NEXT: .cfi_undefined 2769 +; CHECK-NEXT: .cfi_undefined 2770 +; CHECK-NEXT: .cfi_undefined 2771 +; CHECK-NEXT: .cfi_undefined 2772 +; CHECK-NEXT: .cfi_undefined 2773 +; CHECK-NEXT: .cfi_undefined 2774 +; CHECK-NEXT: .cfi_undefined 2775 +; CHECK-NEXT: .cfi_undefined 2784 +; CHECK-NEXT: .cfi_undefined 2785 +; CHECK-NEXT: .cfi_undefined 2786 +; CHECK-NEXT: .cfi_undefined 2787 +; CHECK-NEXT: .cfi_undefined 2788 +; CHECK-NEXT: .cfi_undefined 2789 +; CHECK-NEXT: .cfi_undefined 2790 +; CHECK-NEXT: .cfi_undefined 2791 +; CHECK-NEXT: .cfi_undefined 2800 +; CHECK-NEXT: .cfi_undefined 2801 +; CHECK-NEXT: .cfi_undefined 2802 +; CHECK-NEXT: .cfi_undefined 2803 +; CHECK-NEXT: .cfi_undefined 2804 +; CHECK-NEXT: .cfi_undefined 2805 +; CHECK-NEXT: .cfi_undefined 2806 +; CHECK-NEXT: .cfi_undefined 2807 +; CHECK-NEXT: .cfi_undefined 32 +; CHECK-NEXT: .cfi_undefined 33 +; CHECK-NEXT: .cfi_undefined 34 +; CHECK-NEXT: .cfi_undefined 35 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 +; CHECK-NEXT: .cfi_undefined 38 +; CHECK-NEXT: .cfi_undefined 39 +; CHECK-NEXT: .cfi_undefined 40 +; CHECK-NEXT: .cfi_undefined 41 +; CHECK-NEXT: .cfi_undefined 42 +; CHECK-NEXT: .cfi_undefined 43 +; CHECK-NEXT: .cfi_undefined 44 +; CHECK-NEXT: .cfi_undefined 45 +; CHECK-NEXT: .cfi_undefined 46 +; CHECK-NEXT: .cfi_undefined 47 +; CHECK-NEXT: .cfi_undefined 48 +; CHECK-NEXT: .cfi_undefined 49 +; CHECK-NEXT: .cfi_undefined 50 +; CHECK-NEXT: .cfi_undefined 51 +; CHECK-NEXT: .cfi_undefined 52 +; CHECK-NEXT: .cfi_undefined 53 +; CHECK-NEXT: .cfi_undefined 54 +; CHECK-NEXT: .cfi_undefined 55 +; CHECK-NEXT: .cfi_undefined 56 +; CHECK-NEXT: .cfi_undefined 57 +; CHECK-NEXT: .cfi_undefined 58 +; CHECK-NEXT: .cfi_undefined 59 +; CHECK-NEXT: .cfi_undefined 60 +; CHECK-NEXT: .cfi_undefined 61 +; CHECK-NEXT: .cfi_undefined 72 +; CHECK-NEXT: .cfi_undefined 73 +; CHECK-NEXT: .cfi_undefined 74 +; CHECK-NEXT: .cfi_undefined 75 +; CHECK-NEXT: .cfi_undefined 76 +; CHECK-NEXT: .cfi_undefined 77 +; CHECK-NEXT: .cfi_undefined 78 +; CHECK-NEXT: .cfi_undefined 79 +; CHECK-NEXT: .cfi_undefined 88 +; CHECK-NEXT: .cfi_undefined 89 +; CHECK-NEXT: .cfi_undefined 90 +; CHECK-NEXT: .cfi_undefined 91 +; CHECK-NEXT: .cfi_undefined 92 +; CHECK-NEXT: .cfi_undefined 93 +; CHECK-NEXT: .cfi_undefined 94 +; CHECK-NEXT: .cfi_undefined 95 +; CHECK-NEXT: .cfi_undefined 1096 +; CHECK-NEXT: .cfi_undefined 1097 +; CHECK-NEXT: .cfi_undefined 1098 +; CHECK-NEXT: .cfi_undefined 1099 +; CHECK-NEXT: .cfi_undefined 1100 +; CHECK-NEXT: .cfi_undefined 1101 +; CHECK-NEXT: .cfi_undefined 1102 +; CHECK-NEXT: .cfi_undefined 1103 +; CHECK-NEXT: .cfi_undefined 1112 +; CHECK-NEXT: .cfi_undefined 1113 +; CHECK-NEXT: .cfi_undefined 1114 +; CHECK-NEXT: .cfi_undefined 1115 +; CHECK-NEXT: .cfi_undefined 1116 +; CHECK-NEXT: .cfi_undefined 1117 +; CHECK-NEXT: .cfi_undefined 1118 +; CHECK-NEXT: .cfi_undefined 1119 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s16, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 2600, 0 ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v40, s16, 2 +; CHECK-NEXT: .cfi_llvm_vector_registers 65, 2600, 2, 32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-NEXT: v_writelane_b32 v40, s31, 1 +; CHECK-NEXT: .cfi_llvm_vector_registers 16, 2623, 0, 32, 2623, 1, 32 ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: .loc 0 31 3 prologue_end ; lane-info.cpp:31:3 ; CHECK-NEXT: s_getpc_b64 s[16:17] @@ -36,13 +248,14 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: .Ltmp1: ; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll index 627f4ada95dba..bac460949d579 100644 --- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -219,8 +219,8 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3 ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MUBUF-NEXT: s_mov_b32 s7, s33 -; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; MUBUF-NEXT: s_mov_b32 s33, s32 +; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; MUBUF-NEXT: s_addk_i32 s32, 0x400 ; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc ; MUBUF-NEXT: s_cbranch_execz .LBB2_3 @@ -254,8 +254,8 @@ define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i3 ; FLATSCR: ; %bb.0: ; %entry ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; FLATSCR-NEXT: s_mov_b32 s3, s33 -; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; FLATSCR-NEXT: s_mov_b32 s33, s32 +; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 ; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc ; FLATSCR-NEXT: s_cbranch_execz .LBB2_3 @@ -317,9 +317,9 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i ; MUBUF-NEXT: s_mov_b32 s7, s33 ; MUBUF-NEXT: s_add_i32 s33, s32, 0xfc0 ; MUBUF-NEXT: s_mov_b32 s8, s34 -; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; MUBUF-NEXT: s_and_b32 s33, s33, 0xfffff000 ; MUBUF-NEXT: s_mov_b32 s34, s32 +; MUBUF-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; MUBUF-NEXT: s_addk_i32 s32, 0x2000 ; MUBUF-NEXT: s_and_saveexec_b64 s[4:5], vcc ; MUBUF-NEXT: s_cbranch_execz .LBB3_2 @@ -354,9 +354,9 @@ define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i ; FLATSCR-NEXT: s_mov_b32 s3, s33 ; FLATSCR-NEXT: s_add_i32 s33, s32, 63 ; FLATSCR-NEXT: s_mov_b32 s4, s34 -; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; FLATSCR-NEXT: s_andn2_b32 s33, s33, 63 ; FLATSCR-NEXT: s_mov_b32 s34, s32 +; FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; FLATSCR-NEXT: s_addk_i32 s32, 0x80 ; FLATSCR-NEXT: s_and_saveexec_b64 s[0:1], vcc ; FLATSCR-NEXT: s_cbranch_execz .LBB3_2 diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll index 2aae26b9470a8..34dd69f966637 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll @@ -33,6 +33,8 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX908: bb.0 (%ir-block.0): ; PEI-GFX908-NEXT: liveins: $agpr4, $sgpr4_sgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9 ; PEI-GFX908-NEXT: {{ $}} + ; PEI-GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; PEI-GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; PEI-GFX908-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3 ; PEI-GFX908-NEXT: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; PEI-GFX908-NEXT: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 @@ -79,6 +81,8 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX90A: bb.0 (%ir-block.0): ; PEI-GFX90A-NEXT: liveins: $sgpr4_sgpr5 ; PEI-GFX90A-NEXT: {{ $}} + ; PEI-GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; PEI-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2228233 /* reguse:AGPR_32 */, undef renamable $agpr0 ; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7929866 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3 ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir index bb248fe0444db..0822067b6a12c 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir @@ -36,7 +36,116 @@ body: | ; GCN-LABEL: name: preserve_active_lanes_above_args ; GCN: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr10, 32, $exec_lo, 32, 0 ; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc @@ -69,8 +178,125 @@ body: | ; GCN-LABEL: name: preserve_all_lanes_wwm_above_args ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr10, 0 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10 @@ -112,6 +338,122 @@ body: | ; GCN-LABEL: name: dont_preserve_args ; GCN: liveins: $sgpr0, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc @@ -143,8 +485,125 @@ body: | ; GCN-LABEL: name: preserve_inactive_lanes_wwm_args ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr9, 0 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 @@ -186,6 +645,17 @@ body: | ; GCN-LABEL: name: dont_preserve_if_no_chain_calls ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 @@ -222,6 +692,116 @@ body: | ; GCN-LABEL: name: dont_preserve_v0_v7 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 @@ -260,6 +840,114 @@ body: | ; GCN-LABEL: name: dont_preserve_sgpr ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc ; GCN-NEXT: $sgpr0 = COPY killed renamable $sgpr1 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir index 4aea915936ffc..b4f4412373509 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir @@ -37,9 +37,127 @@ body: | ; GCN-LABEL: name: preserve_inactive_wwm ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr8, 0 ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr9, 128 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) @@ -73,6 +191,18 @@ body: | ; GCN-LABEL: name: dont_preserve_wwm_if_no_chain_calls ; GCN: liveins: $sgpr35, $vgpr8 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 @@ -106,6 +236,114 @@ body: | ; GCN-LABEL: name: dont_preserve_wwm_if_init_whole_wave ; GCN: liveins: $sgpr0, $sgpr35 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr1 @@ -131,6 +369,116 @@ body: | ; GCN-LABEL: name: dont_preserve_non_wwm ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr16 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: renamable $vgpr16 = V_MOV_B32_e32 16, implicit $exec ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 8, implicit $exec ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc @@ -162,6 +510,118 @@ body: | ; GCN-LABEL: name: dont_preserve_v0_v7 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 @@ -200,6 +660,114 @@ body: | ; GCN-LABEL: name: dont_preserve_sgpr ; GCN: liveins: $sgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr30 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr64 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr65 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr66 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr67 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr68 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr69 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr70 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr71 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr80 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr81 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr82 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr83 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr84 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr85 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr86 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr87 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr96 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr97 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr98 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr99 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr100 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr101 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr102 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr103 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr104 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr105 ; GCN-NEXT: renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc ; GCN-NEXT: $sgpr0 = COPY killed renamable $sgpr1 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir index 4b4e9f1d81ec6..fa52c2f2bba71 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir @@ -20,7 +20,10 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v1 - ; MUBUF: $vgpr0 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-NEXT: S_ENDPGM 0 @@ -28,13 +31,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v1 ; MUBUF-V2A: liveins: $agpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v1 - ; FLATSCR: $vgpr0 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -42,13 +52,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v1 ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v1 - ; MUBUF-GFX90A: $vgpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 @@ -56,13 +73,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v1 ; MUBUF-GFX90A-V2A: liveins: $agpr0 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v1 - ; FLATSCR-GFX90A: $vgpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -70,6 +94,10 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v1 ; FLATSCR-GFX90A-V2A: liveins: $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec @@ -93,7 +121,11 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v2 - ; MUBUF: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -103,6 +135,12 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v2 ; MUBUF-V2A: liveins: $agpr0, $agpr1 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -111,7 +149,11 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v2 - ; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -119,6 +161,12 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v2 ; FLATSCR-V2A: liveins: $agpr0, $agpr1 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -127,7 +175,11 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v2 - ; MUBUF-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -137,6 +189,12 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v2 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -145,7 +203,11 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v2 - ; FLATSCR-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -153,6 +215,12 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v2 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -178,7 +246,12 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v3 - ; MUBUF: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -190,6 +263,14 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v3 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -200,7 +281,12 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v3 - ; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -208,6 +294,14 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v3 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 @@ -218,7 +312,12 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v3 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -230,6 +329,14 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v3 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -240,7 +347,12 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v3 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -248,6 +360,14 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v3 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 @@ -275,7 +395,13 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v4 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -289,6 +415,16 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v4 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -301,7 +437,13 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v4 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -309,6 +451,16 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v4 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -321,7 +473,13 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v4 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -335,6 +493,16 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v4 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -347,7 +515,13 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v4 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -355,6 +529,16 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v4 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -384,7 +568,14 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v5 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -400,6 +591,18 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v5 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -414,7 +617,14 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v5 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -424,6 +634,18 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v5 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 @@ -438,7 +660,14 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v5 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -454,6 +683,18 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v5 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -468,7 +709,14 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v5 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -478,6 +726,18 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v5 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 @@ -509,7 +769,15 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v6 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -527,6 +795,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v6 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -543,7 +825,15 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v6 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -553,6 +843,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v6 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 @@ -569,7 +873,15 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v6 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -587,6 +899,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v6 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -603,7 +929,15 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v6 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -613,6 +947,20 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v6 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 @@ -646,7 +994,16 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v7 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -666,6 +1023,22 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v7 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -684,7 +1057,16 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v7 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s96) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -694,6 +1076,22 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v7 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -712,7 +1110,16 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v7 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -732,6 +1139,22 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v7 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -750,7 +1173,16 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v7 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s96) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -760,6 +1192,22 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v7 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 @@ -795,7 +1243,17 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v8 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -817,6 +1275,24 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v8 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -837,7 +1313,17 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v8 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -847,6 +1333,24 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v8 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -867,7 +1371,17 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v8 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -889,6 +1403,24 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v8 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -909,7 +1441,17 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v8 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -919,6 +1461,24 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v8 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -956,7 +1516,25 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v16 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -994,6 +1572,40 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v16 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1030,7 +1642,25 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v16 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1044,6 +1674,40 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v16 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 @@ -1080,7 +1744,25 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v16 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1118,6 +1800,40 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v16 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1154,7 +1870,25 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1168,6 +1902,40 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v16 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 @@ -1221,7 +1989,41 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_v32 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1291,6 +2093,72 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_v32 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1359,7 +2227,41 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_v32 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1381,6 +2283,72 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_v32 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 @@ -1449,7 +2417,41 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_v32 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1519,6 +2521,72 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v32 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1587,7 +2655,41 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v32 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1609,6 +2711,72 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v32 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 @@ -1694,7 +2862,10 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a1 - ; MUBUF: $agpr0 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) @@ -1704,13 +2875,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a1 ; MUBUF-V2A: liveins: $vgpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a1 - ; FLATSCR: $agpr0 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) @@ -1720,13 +2898,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a1 ; FLATSCR-V2A: liveins: $vgpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a1 - ; MUBUF-GFX90A: $agpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 @@ -1734,13 +2919,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a1 ; MUBUF-GFX90A-V2A: liveins: $vgpr0 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a1 - ; FLATSCR-GFX90A: $agpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1748,6 +2940,10 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a1 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec @@ -1771,7 +2967,11 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a2 - ; MUBUF: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -1785,6 +2985,12 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a2 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1793,7 +2999,11 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a2 - ; FLATSCR: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -1807,6 +3017,12 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a2 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1815,7 +3031,11 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a2 - ; MUBUF-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -1825,6 +3045,12 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a2 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1833,7 +3059,11 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a2 - ; FLATSCR-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1841,6 +3071,12 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a2 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1 @@ -1866,7 +3102,12 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a3 - ; MUBUF: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1884,6 +3125,14 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a3 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1894,7 +3143,12 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a3 - ; FLATSCR: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1912,6 +3166,14 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a3 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1922,7 +3184,12 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a3 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1934,6 +3201,14 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a3 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1944,7 +3219,12 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a3 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1952,6 +3232,14 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a3 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 @@ -1979,7 +3267,13 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a4 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2001,6 +3295,16 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a4 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2013,7 +3317,13 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a4 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2035,6 +3345,16 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a4 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2047,7 +3367,13 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a4 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2061,6 +3387,16 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a4 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2073,7 +3409,13 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a4 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -2081,6 +3423,16 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a4 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 @@ -2110,7 +3462,14 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a5 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2136,6 +3495,18 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a5 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2150,7 +3521,14 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a5 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2176,6 +3554,18 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a5 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2190,7 +3580,14 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a5 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2206,6 +3603,18 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a5 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2220,7 +3629,14 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a5 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2230,6 +3646,18 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a5 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 @@ -2261,7 +3689,15 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a6 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2291,6 +3727,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a6 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2307,7 +3757,15 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a6 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2337,6 +3795,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a6 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2353,7 +3825,15 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a6 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2371,6 +3851,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a6 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2387,7 +3881,15 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a6 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2397,6 +3899,20 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a6 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 @@ -2430,7 +3946,16 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a7 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2464,6 +3989,22 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a7 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2482,7 +4023,16 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a7 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2516,6 +4066,22 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a7 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2534,7 +4100,16 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a7 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2554,6 +4129,22 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a7 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2572,7 +4163,16 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a7 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr4_agpr5_agpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s96) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2582,6 +4182,22 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a7 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 @@ -2617,7 +4233,17 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a8 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2655,6 +4281,24 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a8 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2675,7 +4319,17 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a8 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2713,6 +4367,24 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a8 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2733,7 +4405,17 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a8 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2755,6 +4437,24 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a8 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2775,7 +4475,17 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a8 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2785,6 +4495,24 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a8 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 @@ -2822,7 +4550,18 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a9 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2864,6 +4603,26 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a9 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2886,7 +4645,18 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a9 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2928,6 +4698,26 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a9 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2950,7 +4740,18 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a9 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2974,6 +4775,26 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a9 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2996,7 +4817,18 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a9 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr8, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0 + 32, addrspace 5) @@ -3008,6 +4840,26 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a9 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 @@ -3047,7 +4899,19 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a10 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3093,6 +4957,28 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a10 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; MUBUF-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3117,7 +5003,19 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a10 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3163,6 +5061,28 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a10 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; FLATSCR-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3187,7 +5107,19 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a10 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -3213,6 +5145,28 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a10 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; MUBUF-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3237,7 +5191,19 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a10 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr8_agpr9, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s64) into %stack.0 + 32, align 4, addrspace 5) @@ -3249,6 +5215,28 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a10 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 @@ -3290,7 +5278,20 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a11 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3340,6 +5341,30 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a11 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; MUBUF-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3366,7 +5391,20 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a11 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3416,6 +5454,30 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a11 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; FLATSCR-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3442,7 +5504,20 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a11 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -3470,6 +5545,30 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a11 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; MUBUF-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3496,7 +5595,20 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a11 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr8_agpr9_agpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s96) into %stack.0 + 32, align 4, addrspace 5) @@ -3508,6 +5620,30 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a11 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 @@ -3551,7 +5687,21 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a12 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3605,6 +5755,32 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a12 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; MUBUF-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3633,7 +5809,21 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a12 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3687,6 +5877,32 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a12 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; FLATSCR-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3715,7 +5931,21 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a12 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -3745,6 +5975,32 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a12 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; MUBUF-GFX90A-V2A-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3773,7 +6029,21 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a12 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -3785,6 +6055,32 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a12 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 @@ -3830,7 +6126,25 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a16 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3900,6 +6214,40 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a16 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3936,7 +6284,25 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a16 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4006,6 +6372,40 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a16 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4042,7 +6442,25 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a16 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -4080,6 +6498,40 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a16 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4116,7 +6568,25 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a16 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -4130,6 +6600,40 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a16 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -4183,7 +6687,41 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_av_a32 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4317,6 +6855,72 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_av_a32 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4385,7 +6989,41 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_av_a32 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4519,6 +7157,72 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_av_a32 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4587,7 +7291,41 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_av_a32 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -4657,6 +7395,72 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a32 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -4725,7 +7529,41 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a32 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -4747,6 +7585,72 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a32 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir index 8eddc9a5afd50..603aa92f1b27a 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir @@ -59,6 +59,10 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v2_partial_agpr ; MUBUF-V2A: liveins: $agpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -69,6 +73,10 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v2_partial_agpr ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) @@ -96,6 +104,11 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v3_partial_agpr ; MUBUF-V2A: liveins: $agpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) @@ -108,6 +121,11 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v3_partial_agpr ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -135,6 +153,12 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v4_partial_agpr ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -149,6 +173,12 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v4_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -180,6 +210,13 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v5_partial_agpr ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) @@ -196,6 +233,13 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v5_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 @@ -227,6 +271,14 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v6_partial_agpr ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -245,6 +297,14 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v6_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 @@ -280,6 +340,16 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v8_partial_agpr ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) @@ -302,6 +372,16 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v8_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -335,6 +415,24 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v16_partial_agpr ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) @@ -373,6 +471,24 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v16_partial_agpr ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir index 2fbe08300af57..94518c6ae455f 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir @@ -20,7 +20,10 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v1 - ; MUBUF: $vgpr0 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-NEXT: S_ENDPGM 0 @@ -28,13 +31,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v1 ; MUBUF-V2A: liveins: $agpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v1 - ; FLATSCR: $vgpr0 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -42,13 +52,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v1 ; FLATSCR-V2A: liveins: $agpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v1 - ; MUBUF-GFX90A: $vgpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 @@ -56,13 +73,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v1 ; MUBUF-GFX90A-V2A: liveins: $agpr0 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v1 - ; FLATSCR-GFX90A: $vgpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -70,6 +94,10 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v1 ; FLATSCR-GFX90A-V2A: liveins: $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec @@ -93,7 +121,11 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v2 - ; MUBUF: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -103,6 +135,12 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v2 ; MUBUF-V2A: liveins: $agpr0, $agpr1 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -111,7 +149,11 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v2 - ; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -119,6 +161,12 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v2 ; FLATSCR-V2A: liveins: $agpr0, $agpr1 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -127,7 +175,11 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v2 - ; MUBUF-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -137,6 +189,12 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v2 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -145,7 +203,11 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v2 - ; FLATSCR-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -153,6 +215,12 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v2 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit killed $vgpr0_vgpr1 @@ -178,7 +246,12 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v3 - ; MUBUF: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -190,6 +263,14 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v3 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -200,7 +281,12 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v3 - ; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -208,6 +294,14 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v3 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 @@ -218,7 +312,12 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v3 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -230,6 +329,14 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v3 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; MUBUF-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -240,7 +347,12 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v3 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -248,6 +360,14 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v3 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 @@ -275,7 +395,13 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v4 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -289,6 +415,16 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v4 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -301,7 +437,13 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v4 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: S_ENDPGM 0 @@ -309,6 +451,16 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v4 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -321,7 +473,13 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v4 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -335,6 +493,16 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v4 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; MUBUF-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -347,7 +515,13 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v4 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -355,6 +529,16 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v4 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -384,7 +568,14 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v5 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -400,6 +591,18 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v5 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -414,7 +617,14 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v5 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -424,6 +634,18 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v5 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 @@ -438,7 +660,14 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v5 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -454,6 +683,18 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v5 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; MUBUF-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -468,7 +709,14 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v5 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -478,6 +726,18 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v5 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 @@ -509,7 +769,15 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v6 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -527,6 +795,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v6 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -543,7 +825,15 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v6 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -553,6 +843,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v6 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 @@ -569,7 +873,15 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v6 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -587,6 +899,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v6 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; MUBUF-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -603,7 +929,15 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v6 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -613,6 +947,20 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v6 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 @@ -646,7 +994,17 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v8 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -668,6 +1026,24 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v8 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -688,7 +1064,17 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v8 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -698,6 +1084,24 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v8 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -718,7 +1122,17 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v8 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -740,6 +1154,24 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v8 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MUBUF-GFX90A-V2A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -760,7 +1192,17 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v8 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -770,6 +1212,24 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v8 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -807,7 +1267,25 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v16 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -845,6 +1323,40 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v16 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -881,7 +1393,25 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v16 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -895,6 +1425,40 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v16 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 @@ -931,7 +1495,25 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v16 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -969,6 +1551,40 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v16 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; MUBUF-GFX90A-V2A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1005,7 +1621,25 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v16 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1019,6 +1653,40 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v16 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 @@ -1072,7 +1740,41 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_v32 - ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1142,6 +1844,72 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_v32 ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1210,7 +1978,41 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_v32 - ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1232,6 +2034,72 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_v32 ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 @@ -1300,7 +2168,41 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_v32 - ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1370,6 +2272,72 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v32 ; MUBUF-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; MUBUF-GFX90A-V2A-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec @@ -1438,7 +2406,41 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_v32 - ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -1460,6 +2462,72 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v32 ; FLATSCR-GFX90A-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 @@ -1545,7 +2613,10 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a1 - ; MUBUF: $agpr0 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) @@ -1555,13 +2626,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a1 ; MUBUF-V2A: liveins: $vgpr0 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a1 - ; FLATSCR: $agpr0 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) @@ -1571,13 +2649,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a1 ; FLATSCR-V2A: liveins: $vgpr0 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a1 - ; MUBUF-GFX90A: $agpr0 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: S_ENDPGM 0 @@ -1585,13 +2670,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a1 ; MUBUF-GFX90A-V2A: liveins: $vgpr0 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a1 - ; FLATSCR-GFX90A: $agpr0 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1599,6 +2691,10 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a1 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; FLATSCR-GFX90A-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec @@ -1622,7 +2718,11 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a2 - ; MUBUF: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -1636,6 +2736,12 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a2 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1644,7 +2750,11 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a2 - ; FLATSCR: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -1658,6 +2768,12 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a2 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1666,7 +2782,11 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a2 - ; MUBUF-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.0, addrspace 5) @@ -1676,6 +2796,12 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a2 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; MUBUF-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -1684,7 +2810,11 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a2 - ; FLATSCR-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1692,6 +2822,12 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a2 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1 @@ -1717,7 +2853,12 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a3 - ; MUBUF: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1735,6 +2876,14 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a3 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1745,7 +2894,12 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a3 - ; FLATSCR: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1763,6 +2917,14 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a3 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1773,7 +2935,12 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a3 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1785,6 +2952,14 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a3 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; MUBUF-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1795,7 +2970,12 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a3 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1803,6 +2983,14 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a3 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 @@ -1830,7 +3018,13 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a4 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1852,6 +3046,16 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a4 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1864,7 +3068,13 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a4 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1886,6 +3096,16 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a4 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1898,7 +3118,13 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a4 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -1912,6 +3138,16 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a4 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; MUBUF-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1924,7 +3160,13 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a4 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0 @@ -1932,6 +3174,16 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a4 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 @@ -1961,7 +3213,14 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a5 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1987,6 +3246,18 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a5 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2001,7 +3272,14 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a5 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2027,6 +3305,18 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a5 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2041,7 +3331,14 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a5 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2057,6 +3354,18 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a5 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; MUBUF-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2071,7 +3380,14 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a5 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2081,6 +3397,18 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a5 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 @@ -2112,7 +3440,15 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a6 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2142,6 +3478,20 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a6 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2158,7 +3508,15 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a6 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2188,6 +3546,20 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a6 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2204,7 +3576,15 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a6 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2222,6 +3602,20 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a6 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; MUBUF-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2238,7 +3632,15 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a6 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2248,6 +3650,20 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a6 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 @@ -2281,7 +3697,17 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a8 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2319,6 +3745,24 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a8 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2339,7 +3783,17 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a8 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2377,6 +3831,24 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a8 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2397,7 +3869,17 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a8 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2419,6 +3901,24 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a8 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; MUBUF-GFX90A-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2439,7 +3939,17 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a8 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s128) from %stack.0, align 4, addrspace 5) @@ -2449,6 +3959,24 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a8 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 @@ -2486,7 +4014,25 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a16 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2556,6 +4102,40 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a16 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2592,7 +4172,25 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a16 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2662,6 +4260,40 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a16 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2698,7 +4330,25 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a16 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -2736,6 +4386,40 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a16 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; MUBUF-GFX90A-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2772,7 +4456,25 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a16 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -2786,6 +4488,40 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a16 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -2839,7 +4575,41 @@ machineFunctionInfo: body: | bb.0.entry: ; MUBUF-LABEL: name: test_spill_a32 - ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; MUBUF-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -2973,6 +4743,72 @@ body: | ; MUBUF-V2A-LABEL: name: test_spill_a32 ; MUBUF-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; MUBUF-V2A-NEXT: {{ $}} + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3041,7 +4877,41 @@ body: | ; MUBUF-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: test_spill_a32 - ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; FLATSCR-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3175,6 +5045,72 @@ body: | ; FLATSCR-V2A-LABEL: name: test_spill_a32 ; FLATSCR-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; FLATSCR-V2A-NEXT: {{ $}} + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3243,7 +5179,41 @@ body: | ; FLATSCR-V2A-NEXT: S_ENDPGM 0 ; ; MUBUF-GFX90A-LABEL: name: test_spill_a32 - ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; MUBUF-GFX90A: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; MUBUF-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -3313,6 +5283,72 @@ body: | ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a32 ; MUBUF-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; MUBUF-GFX90A-V2A-NEXT: {{ $}} + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; MUBUF-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; MUBUF-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; MUBUF-GFX90A-V2A-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; MUBUF-GFX90A-V2A-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -3381,7 +5417,41 @@ body: | ; MUBUF-GFX90A-V2A-NEXT: S_ENDPGM 0 ; ; FLATSCR-GFX90A-LABEL: name: test_spill_a32 - ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF + ; FLATSCR-GFX90A: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 + ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5) ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5) @@ -3403,6 +5473,72 @@ body: | ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a32 ; FLATSCR-GFX90A-V2A: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; FLATSCR-GFX90A-V2A-NEXT: {{ $}} + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; FLATSCR-GFX90A-V2A-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF ; FLATSCR-GFX90A-V2A-NEXT: $vgpr28 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; FLATSCR-GFX90A-V2A-NEXT: $vgpr29 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 diff --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir index aa4428f3da4eb..8027373123d61 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir @@ -27,6 +27,8 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr4, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr4 = S_MOV_B32 524288 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir index 05cbd4c2a010d..71e7ca11a86cd 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -29,11 +29,43 @@ body: | ; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs ; CHECK: liveins: $sgpr40, $sgpr41, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; CHECK-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -45,6 +77,7 @@ body: | ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -77,11 +110,42 @@ body: | ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr ; CHECK: liveins: $sgpr29, $sgpr40, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 ; CHECK-NEXT: $sgpr29 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr29 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr40 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -93,6 +157,7 @@ body: | ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr40 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr29 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -125,11 +190,41 @@ body: | ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64 ; CHECK: liveins: $sgpr28, $sgpr29, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 ; CHECK-NEXT: $sgpr28 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr28 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; CHECK-NEXT: $sgpr29 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr29 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -141,6 +236,7 @@ body: | ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr29 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr28 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -172,11 +268,41 @@ body: | ; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc ; CHECK: liveins: $sgpr28, $sgpr29, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 ; CHECK-NEXT: $sgpr28 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr28 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; CHECK-NEXT: $sgpr29 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr29 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31 ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -188,6 +314,7 @@ body: | ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr29 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr28 ; CHECK-NEXT: S_ENDPGM 0 S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir index 4f1c9a20fddc3..7c4e03fd0e6df 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir @@ -25,11 +25,43 @@ body: | ; MUBUF-LABEL: name: scavenge_sgpr_pei_no_sgprs ; MUBUF: liveins: $sgpr40, $sgpr41, $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; MUBUF-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; MUBUF-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -39,17 +71,50 @@ body: | ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; MUBUF-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; MUBUF-NEXT: S_ENDPGM 0, implicit $vcc ; ; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs ; FLATSCR: liveins: $sgpr40, $sgpr41, $vgpr1 ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; FLATSCR-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; FLATSCR-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; FLATSCR-NEXT: $sgpr42 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc @@ -58,6 +123,7 @@ body: | ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr42, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; FLATSCR-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; FLATSCR-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir index 480859a09a347..cd335321e2156 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir @@ -24,11 +24,43 @@ body: | ; CHECK-LABEL: name: scavenge_sgpr_pei ; CHECK: liveins: $sgpr40, $sgpr41, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc ; CHECK-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 786432, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec @@ -37,6 +69,7 @@ body: | ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir index 63a4759d8e740..fb3e8116d86a4 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -26,16 +26,85 @@ body: | ; GFX8-LABEL: name: pei_scavenge_vgpr_spill ; GFX8: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX8-NEXT: $sgpr4 = COPY $sgpr33 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX8-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 1048832 ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; GFX8-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr33, $vgpr2, 0, 32 ; GFX8-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr34, $vgpr2, 1, 32 ; GFX8-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX8-NEXT: $vcc_lo = S_MOV_B32 8192 @@ -51,22 +120,92 @@ body: | ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; GFX8-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX8-NEXT: $sgpr33 = COPY $sgpr4 ; GFX8-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; ; GFX9-LABEL: name: pei_scavenge_vgpr_spill ; GFX9: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-NEXT: $sgpr4 = COPY $sgpr33 ; GFX9-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; GFX9-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; GFX9-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 1048832 ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; GFX9-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr33, $vgpr2, 0, 32 ; GFX9-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr34, $vgpr2, 1, 32 ; GFX9-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; GFX9-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; GFX9-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; GFX9-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec @@ -80,22 +219,92 @@ body: | ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; GFX9-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; GFX9-NEXT: $sgpr33 = COPY $sgpr4 ; GFX9-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill ; GFX9-FLATSCR: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: $sgpr4 = COPY $sgpr33 ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 1048832 ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 ; GFX9-FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr33, $vgpr2, 0, 32 ; GFX9-FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr34, $vgpr2, 1, 32 ; GFX9-FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec @@ -108,6 +317,7 @@ body: | ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 + ; GFX9-FLATSCR-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; GFX9-FLATSCR-NEXT: $sgpr33 = COPY $sgpr4 ; GFX9-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir b/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir index bfca9331a5d25..ff8418e5b2f60 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+block-vgpr-csr,+wavefrontsize32,-wavefrontsize64 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,W32 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+block-vgpr-csr,-wavefrontsize32,+wavefrontsize64 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,W64 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+block-vgpr-csr,+wavefrontsize32,-wavefrontsize64 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=W32 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+block-vgpr-csr,-wavefrontsize32,+wavefrontsize64 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=W64 --- | define void @one_block() { ret void } @@ -23,15 +23,93 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: one_block - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 9 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 - ; CHECK-NEXT: $m0 = S_MOV_B32 9 - ; CHECK-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: one_block + ; W32: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: $m0 = S_MOV_B32 9 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 96 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr72 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr73 + ; W32-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 + ; W32-NEXT: $m0 = S_MOV_B32 9 + ; W32-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: one_block + ; W64: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: $m0 = S_MOV_B32 9 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 192 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr72 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr73 + ; W64-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 + ; W64-NEXT: $m0 = S_MOV_B32 9 + ; W64-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 S_SETPC_B64_return $sgpr30_sgpr31 ... @@ -47,15 +125,93 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: one_block_csr_only - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 16711935 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 - ; CHECK-NEXT: $m0 = S_MOV_B32 16711935 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: one_block_csr_only + ; W32: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: $m0 = S_MOV_B32 16711935 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 96 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec_lo, 32, 128 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 160 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec_lo, 32, 192 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec_lo, 32, 224 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec_lo, 32, 512 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec_lo, 32, 544 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec_lo, 32, 576 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec_lo, 32, 608 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec_lo, 32, 640 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec_lo, 32, 672 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec_lo, 32, 704 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec_lo, 32, 736 + ; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 + ; W32-NEXT: $m0 = S_MOV_B32 16711935 + ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: one_block_csr_only + ; W64: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: $m0 = S_MOV_B32 16711935 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 128 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 192 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr44, 32, $exec, 64, 256 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 320 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr46, 32, $exec, 64, 384 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr47, 32, $exec, 64, 448 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr56, 32, $exec, 64, 1024 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr57, 32, $exec, 64, 1088 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr58, 32, $exec, 64, 1152 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr59, 32, $exec, 64, 1216 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr60, 32, $exec, 64, 1280 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr61, 32, $exec, 64, 1344 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr62, 32, $exec, 64, 1408 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr63, 32, $exec, 64, 1472 + ; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 + ; W64-NEXT: $m0 = S_MOV_B32 16711935 + ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 S_SETPC_B64_return $sgpr30_sgpr31 ... @@ -75,23 +231,221 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: multiple_blocks - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 65 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 1 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 - ; CHECK-NEXT: $m0 = S_MOV_B32 1 - ; CHECK-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: (load (s1024) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 65 - ; CHECK-NEXT: $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr105, implicit $vgpr106, implicit $vgpr107, implicit $vgpr108, implicit $vgpr109, implicit $vgpr111, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: multiple_blocks + ; W32: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; W32-NEXT: $m0 = S_MOV_B32 3 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 1024 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 1056 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W32-NEXT: $m0 = S_MOV_B32 65 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr104, 32, $exec_lo, 32, 128 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr105 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr106 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr107 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr108 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr109 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr110, 32, $exec_lo, 32, 320 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr111 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr120 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr121 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr122 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr123 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr124 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr125 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr126 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr127 + ; W32-NEXT: $m0 = S_MOV_B32 1 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr232, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr233 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr234 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr235 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr236 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr237 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr238 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr239 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr248 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr249 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr250 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr251 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr252 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr253 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr254 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr255 + ; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 + ; W32-NEXT: $m0 = S_MOV_B32 1 + ; W32-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: (load (s1024) from %stack.2, align 4, addrspace 5) + ; W32-NEXT: $m0 = S_MOV_B32 65 + ; W32-NEXT: $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr105, implicit $vgpr106, implicit $vgpr107, implicit $vgpr108, implicit $vgpr109, implicit $vgpr111, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; W32-NEXT: $m0 = S_MOV_B32 3 + ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: multiple_blocks + ; W64: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr112 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr113 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr114 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr115 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr116 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr117 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr118 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr119 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr128 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr129 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr130 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr131 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr132 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr133 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr134 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr135 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr240 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr241 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr242 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr243 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr244 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr245 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr246 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr247 + ; W64-NEXT: $m0 = S_MOV_B32 3 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 2048 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 2112 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W64-NEXT: $m0 = S_MOV_B32 65 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr104, 32, $exec, 64, 256 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr105 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr106 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr107 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr108 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr109 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr110, 32, $exec, 64, 640 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr111 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr120 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr121 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr122 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr123 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr124 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr125 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr126 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr127 + ; W64-NEXT: $m0 = S_MOV_B32 1 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr232, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr233 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr234 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr235 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr236 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr237 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr238 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr239 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr248 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr249 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr250 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr251 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr252 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr253 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr254 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr255 + ; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 + ; W64-NEXT: $m0 = S_MOV_B32 1 + ; W64-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: (load (s1024) from %stack.2, align 4, addrspace 5) + ; W64-NEXT: $m0 = S_MOV_B32 65 + ; W64-NEXT: $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr105, implicit $vgpr106, implicit $vgpr107, implicit $vgpr108, implicit $vgpr109, implicit $vgpr111, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; W64-NEXT: $m0 = S_MOV_B32 3 + ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 S_SETPC_B64_return $sgpr30_sgpr31 ... @@ -109,19 +463,165 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: reg_tuples - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 7 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 7 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: reg_tuples + ; W32: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; W32-NEXT: $m0 = S_MOV_B32 7 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 256 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec_lo, 32, 288 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 320 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W32-NEXT: $m0 = S_MOV_B32 3 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr72, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr73, 32, $exec_lo, 32, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr74 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr75 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr76 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr77 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr78 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr79 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr88 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr89 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr90 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr91 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr92 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr93 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr94 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr95 + ; W32-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 + ; W32-NEXT: $m0 = S_MOV_B32 3 + ; W32-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; W32-NEXT: $m0 = S_MOV_B32 7 + ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: reg_tuples + ; W64: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr80 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr81 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr82 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr83 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr84 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr85 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr86 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr87 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr96 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr97 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr98 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr99 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr100 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr101 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr102 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr103 + ; W64-NEXT: $m0 = S_MOV_B32 7 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 512 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr41, 32, $exec, 64, 576 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 640 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W64-NEXT: $m0 = S_MOV_B32 3 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr72, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr73, 32, $exec, 64, 64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr74 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr75 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr76 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr77 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr78 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr79 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr88 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr89 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr90 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr91 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr92 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr93 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr94 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr95 + ; W64-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 + ; W64-NEXT: $m0 = S_MOV_B32 3 + ; W64-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; W64-NEXT: $m0 = S_MOV_B32 7 + ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 S_SETPC_B64_return $sgpr30_sgpr31 ... @@ -145,17 +645,97 @@ stack: body: | bb.0: liveins: $sgpr30_sgpr31, $vgpr48 - ; CHECK-LABEL: name: locals - ; CHECK: liveins: $vgpr48, $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 1 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40 - ; CHECK-NEXT: $m0 = S_MOV_B32 1 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: locals + ; W32: liveins: $vgpr48, $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: $m0 = S_MOV_B32 1 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr41 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; W32-NEXT: S_NOP 0, implicit-def $vgpr40 + ; W32-NEXT: $m0 = S_MOV_B32 1 + ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.2, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: locals + ; W64: liveins: $vgpr48, $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: $m0 = S_MOV_B32 1 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr41 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr43 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; W64-NEXT: S_NOP 0, implicit-def $vgpr40 + ; W64-NEXT: $m0 = S_MOV_B32 1 + ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.2, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) S_NOP 0, implicit-def $vgpr40 @@ -182,13 +762,51 @@ body: | ; W32-LABEL: name: other_regs ; W32: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 ; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr41, 512 ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr42, 640 ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr44, 768 ; W32-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; W32-NEXT: $m0 = S_MOV_B32 9 ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr41 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 96 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 ; W32-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44 ; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec ; W32-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40 @@ -205,13 +823,51 @@ body: | ; W64-LABEL: name: other_regs ; W64: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 ; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr41, 1024 ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr42, 1280 ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr44, 1536 ; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; W64-NEXT: $m0 = S_MOV_B32 9 ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr40, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr41 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr42 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 192 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr45 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 ; W64-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44 ; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec ; W64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40 @@ -240,11 +896,27 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: entry_func - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45, implicit-def $vgpr51 - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: entry_func + ; W32: liveins: $sgpr30_sgpr31 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr42 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr45 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45, implicit-def $vgpr51 + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: entry_func + ; W64: liveins: $sgpr30_sgpr31 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr42 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr45 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45, implicit-def $vgpr51 + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45, implicit-def $vgpr51 S_SETPC_B64_return $sgpr30_sgpr31 ... @@ -255,29 +927,121 @@ tracksRegLiveness: true machineFunctionInfo: stackPtrOffsetReg: $sgpr32 body: | - ; CHECK-LABEL: name: multiple_basic_blocks - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr44, $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 11 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 - ; CHECK-NEXT: S_BRANCH %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $vgpr44, $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr43, implicit $vgpr44 - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 11 - ; CHECK-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; W32-LABEL: name: multiple_basic_blocks + ; W32: bb.0: + ; W32-NEXT: successors: %bb.1(0x80000000) + ; W32-NEXT: liveins: $vgpr44, $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W32-NEXT: {{ $}} + ; W32-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W32-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W32-NEXT: $m0 = S_MOV_B32 11 + ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec_lo, 32, 0 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec_lo, 32, 32 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W32-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec_lo, 32, 96 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr72 + ; W32-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr73 + ; W32-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 + ; W32-NEXT: S_BRANCH %bb.1 + ; W32-NEXT: {{ $}} + ; W32-NEXT: bb.1: + ; W32-NEXT: successors: %bb.2(0x80000000) + ; W32-NEXT: liveins: $vgpr44, $sgpr30_sgpr31 + ; W32-NEXT: {{ $}} + ; W32-NEXT: S_NOP 0, implicit-def $vgpr43, implicit $vgpr44 + ; W32-NEXT: S_BRANCH %bb.2 + ; W32-NEXT: {{ $}} + ; W32-NEXT: bb.2: + ; W32-NEXT: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W32-NEXT: {{ $}} + ; W32-NEXT: $m0 = S_MOV_B32 11 + ; W32-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + ; + ; W64-LABEL: name: multiple_basic_blocks + ; W64: bb.0: + ; W64-NEXT: successors: %bb.1(0x80000000) + ; W64-NEXT: liveins: $vgpr44, $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W64-NEXT: {{ $}} + ; W64-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr65 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr66 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr67 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr68 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr69 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr70 + ; W64-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr71 + ; W64-NEXT: $m0 = S_MOV_B32 11 + ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr42, 32, $exec, 64, 0 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr43, 32, $exec, 64, 64 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr44 + ; W64-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_offset $vgpr45, 32, $exec, 64, 192 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr46 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr47 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr56 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr57 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr58 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr59 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr60 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr61 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr62 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr63 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr72 + ; W64-NEXT: frame-setup CFI_INSTRUCTION same_value $vgpr73 + ; W64-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 + ; W64-NEXT: S_BRANCH %bb.1 + ; W64-NEXT: {{ $}} + ; W64-NEXT: bb.1: + ; W64-NEXT: successors: %bb.2(0x80000000) + ; W64-NEXT: liveins: $vgpr44, $sgpr30_sgpr31 + ; W64-NEXT: {{ $}} + ; W64-NEXT: S_NOP 0, implicit-def $vgpr43, implicit $vgpr44 + ; W64-NEXT: S_BRANCH %bb.2 + ; W64-NEXT: {{ $}} + ; W64-NEXT: bb.2: + ; W64-NEXT: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 + ; W64-NEXT: {{ $}} + ; W64-NEXT: $m0 = S_MOV_B32 11 + ; W64-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 bb.0: liveins: $sgpr30_sgpr31, $vgpr44 S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 diff --git a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir index 168d63d3a95b9..37c8788d8d691 100644 --- a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir +++ b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir @@ -20,6 +20,9 @@ body: | ; GCN-LABEL: name: preserve_scratch_vgpr_inactive_lanes ; GCN: liveins: $sgpr35, $vgpr0, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll index f4a9e7e8f2759..4b03896043dbb 100644 --- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll @@ -17,6 +17,13 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX906-NEXT: s_mov_b64 exec, -1 ; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill ; GFX906-NEXT: s_mov_b64 exec, s[18:19] +; GFX906-NEXT: v_writelane_b32 v41, s16, 4 +; GFX906-NEXT: v_writelane_b32 v41, s34, 2 +; GFX906-NEXT: v_writelane_b32 v41, s35, 3 +; GFX906-NEXT: s_addk_i32 s32, 0x2800 +; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX906-NEXT: v_writelane_b32 v41, s30, 0 +; GFX906-NEXT: v_writelane_b32 v41, s31, 1 ; GFX906-NEXT: s_mov_b32 s21, s15 ; GFX906-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX906-NEXT: s_mov_b32 s22, s14 @@ -30,17 +37,10 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX906-NEXT: v_writelane_b32 v39, s26, 4 ; GFX906-NEXT: v_writelane_b32 v39, s27, 5 ; GFX906-NEXT: v_writelane_b32 v39, s8, 6 -; GFX906-NEXT: v_writelane_b32 v41, s16, 4 ; GFX906-NEXT: v_writelane_b32 v39, s9, 7 -; GFX906-NEXT: v_writelane_b32 v41, s34, 2 ; GFX906-NEXT: v_writelane_b32 v39, s6, 8 -; GFX906-NEXT: v_writelane_b32 v41, s35, 3 ; GFX906-NEXT: v_writelane_b32 v39, s7, 9 -; GFX906-NEXT: v_writelane_b32 v41, s30, 0 ; GFX906-NEXT: v_writelane_b32 v39, s4, 10 -; GFX906-NEXT: s_addk_i32 s32, 0x2800 -; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX906-NEXT: v_writelane_b32 v41, s31, 1 ; GFX906-NEXT: v_mov_b32_e32 v32, v31 ; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX906-NEXT: s_nop 0 @@ -338,8 +338,8 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload ; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload -; GFX906-NEXT: v_readlane_b32 s31, v41, 1 ; GFX906-NEXT: v_readlane_b32 s30, v41, 0 +; GFX906-NEXT: v_readlane_b32 s31, v41, 1 ; GFX906-NEXT: s_mov_b32 s32, s33 ; GFX906-NEXT: v_readlane_b32 s4, v41, 4 ; GFX906-NEXT: v_readlane_b32 s34, v41, 2 @@ -388,21 +388,14 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX908-NEXT: s_addk_i32 s32, 0x2c00 ; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX908-NEXT: s_mov_b64 s[16:17], exec -; GFX908-NEXT: s_mov_b64 exec, 1 +; GFX908-NEXT: s_mov_b64 exec, 3 ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168 ; GFX908-NEXT: v_writelane_b32 v2, s30, 0 +; GFX908-NEXT: v_writelane_b32 v2, s31, 1 ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[16:17] -; GFX908-NEXT: s_mov_b64 s[16:17], exec -; GFX908-NEXT: s_mov_b64 exec, 1 -; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168 -; GFX908-NEXT: v_writelane_b32 v2, s31, 0 -; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168 -; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: s_mov_b64 exec, s[16:17] ; GFX908-NEXT: s_mov_b32 s21, s15 ; GFX908-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX908-NEXT: s_mov_b32 s22, s14 @@ -735,20 +728,12 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: s_mov_b64 exec, 1 -; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168 -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_readlane_b32 s31, v0, 0 -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 -; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: s_mov_b64 exec, s[4:5] -; GFX908-NEXT: s_mov_b64 s[4:5], exec -; GFX908-NEXT: s_mov_b64 exec, 1 +; GFX908-NEXT: s_mov_b64 exec, 3 ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168 ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: v_readlane_b32 s30, v0, 0 +; GFX908-NEXT: v_readlane_b32 s31, v0, 1 ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll b/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll index bf417b211826a..ba460fc7b4266 100644 --- a/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll +++ b/llvm/test/CodeGen/AMDGPU/prologue-epilogue-markers.ll @@ -14,6 +14,8 @@ define hidden void @_Z9base_casev() #0 !dbg !6 { ; CHECK-NEXT: .cfi_sections .debug_frame ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: .loc 0 7 3 prologue_end ; file.cpp:7:3 diff --git a/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll index e29f09dcac024..072f679390e3c 100644 --- a/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll +++ b/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll @@ -14,6 +14,9 @@ define hidden void @ptr_arg_split_subregs(ptr %arg1) #0 !dbg !9 { ; CHECK-NEXT: .cfi_sections .debug_frame ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2562 ; CHECK-NEXT: ;DEBUG_VALUE: ptr_arg_split_subregs:a <- [DW_OP_LLVM_fragment 32 32] [$vgpr1+0] ; CHECK-NEXT: ;DEBUG_VALUE: ptr_arg_split_subregs:a <- [DW_OP_LLVM_fragment 0 32] [$vgpr0+0] ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -43,6 +46,10 @@ define hidden void @ptr_arg_split_reg_mem(<30 x i32>, ptr %arg2) #0 !dbg !25 { ; CHECK-NEXT: .loc 1 10 0 ; example.cpp:10:0 ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2591 ; CHECK-NEXT: ;DEBUG_VALUE: ptr_arg_split_reg_mem:b <- [$vgpr30+0] ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32 @@ -69,6 +76,11 @@ define hidden void @ptr_arg_in_memory(<32 x i32>, ptr %arg3) #0 !dbg !31 { ; CHECK-NEXT: .loc 1 15 0 ; example.cpp:15:0 ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir index e4cbae66d47fa..7f12571a6bdb4 100644 --- a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir @@ -36,6 +36,8 @@ body: | ; GFX908-LABEL: name: regalloc_introduces_s_to_a_copy ; GFX908: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, $vgpr32_vgpr33_vgpr34_vgpr35, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr7 ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr7, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll b/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll index dba10f19eb500..1260e147fbc53 100644 --- a/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll +++ b/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll @@ -11,8 +11,8 @@ define void @test_remat_s_getpc_b64() { ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v2, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: v_writelane_b32 v2, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: ;;#ASMSTART @@ -20,9 +20,9 @@ define void @test_remat_s_getpc_b64() { ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_readlane_b32 s30, v2, 0 ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_readlane_b32 s31, v2, 1 -; GFX9-NEXT: v_readlane_b32 s30, v2, 0 ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[4:5] @@ -36,17 +36,16 @@ define void @test_remat_s_getpc_b64() { ; GFX11-NEXT: scratch_store_b32 off, v2, s32 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v2, s30, 0 +; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v2, s31, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_getpc_b64 s[0:1] -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_readlane_b32 s31, v2, 1 -; GFX11-NEXT: v_readlane_b32 s30, v2, 0 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX11-NEXT: scratch_load_b32 v2, off, s32 ; 4-byte Folded Reload @@ -66,21 +65,21 @@ define void @test_remat_s_getpc_b64() { ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v2, s30, 0 +; GFX12-NEXT: v_writelane_b32 v2, s31, 1 ; GFX12-NEXT: s_getpc_b64 s[0:1] ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_sext_i32_i16 s1, s1 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_writelane_b32 v2, s31, 1 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_getpc_b64 s[0:1] +; GFX12-NEXT: v_readlane_b32 s30, v2, 0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_sext_i32_i16 s1, s1 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX12-NEXT: v_readlane_b32 s31, v2, 1 -; GFX12-NEXT: v_readlane_b32 s30, v2, 0 ; GFX12-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX12-NEXT: scratch_load_b32 v2, off, s32 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir index 592e0f0cf0c24..9b226df530eec 100644 --- a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir @@ -15,6 +15,12 @@ body: | ; CHECK-LABEL: name: same_slot_agpr_sgpr ; CHECK: liveins: $agpr0, $agpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF @@ -48,6 +54,12 @@ body: | ; CHECK-LABEL: name: diff_slot_agpr_sgpr ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF @@ -79,6 +91,10 @@ body: | ; CHECK-LABEL: name: dead_vgpr_slot ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; CHECK-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir index 520717391b596..2f6c628d290ea 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir @@ -59,6 +59,8 @@ body: | ; PEI: bb.0: ; PEI-NEXT: successors: %bb.1(0x80000000) ; PEI-NEXT: {{ $}} + ; PEI-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; PEI-NEXT: renamable $sgpr10 = IMPLICIT_DEF ; PEI-NEXT: $vgpr0 = IMPLICIT_DEF ; PEI-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, killed $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir index 925984b15367d..b71a9eeef208e 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir @@ -28,86 +28,203 @@ body: | ; GCN-LABEL: name: test_main ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x80000000) - ; GCN-NEXT: liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0 + ; GCN-NEXT: liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr34 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr35 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr36 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr37 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr38 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr39 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr51 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 ; GCN-NEXT: $vcc_hi = frame-setup COPY $sgpr33 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, ; GCN-NEXT: $sgpr33 = frame-setup COPY $sgpr32 ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr1, 0 + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 128 + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr3, 256 + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr4, 384 + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr5, 512 ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 ; GCN-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr4, $vgpr255, 0, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr5, $vgpr255, 1, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr6, $vgpr255, 2, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr7, $vgpr255, 3, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr8, $vgpr255, 4, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr9, $vgpr255, 5, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr10, $vgpr255, 6, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr11, $vgpr255, 7, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr12, $vgpr255, 8, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr13, $vgpr255, 9, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr14, $vgpr255, 10, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr15, $vgpr255, 11, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr16, $vgpr255, 12, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr17, $vgpr255, 13, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr18, $vgpr255, 14, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr19, $vgpr255, 15, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr20, $vgpr255, 16, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr21, $vgpr255, 17, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr22, $vgpr255, 18, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr23, $vgpr255, 19, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr24, $vgpr255, 20, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr25, $vgpr255, 21, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr26, $vgpr255, 22, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr27, $vgpr255, 23, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr28, $vgpr255, 24, 32 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr2 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr3 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr29, $vgpr255, 25, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr64, 26, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr64, $vgpr255, 26, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr65, 27, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr65, $vgpr255, 27, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr66, 28, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr66, $vgpr255, 28, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr67, 29, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr67, $vgpr255, 29, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr68, 30, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr68, $vgpr255, 30, 32 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr69, 31, $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr69, $vgpr255, 31, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr70, 0, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr70, $vgpr254, 0, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr71, 1, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr71, $vgpr254, 1, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr72, 2, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr72, $vgpr254, 2, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr73, 3, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr73, $vgpr254, 3, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr74, 4, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr74, $vgpr254, 4, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr75, 5, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr75, $vgpr254, 5, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr76, 6, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr76, $vgpr254, 6, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr77, 7, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr77, $vgpr254, 7, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr78, 8, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr78, $vgpr254, 8, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr79, 9, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr79, $vgpr254, 9, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr80, 10, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr80, $vgpr254, 10, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr81, 11, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr81, $vgpr254, 11, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr82, 12, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr82, $vgpr254, 12, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr83, 13, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr83, $vgpr254, 13, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr84, 14, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr84, $vgpr254, 14, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr85, 15, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr85, $vgpr254, 15, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr86, 16, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr86, $vgpr254, 16, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr87, 17, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr87, $vgpr254, 17, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr88, 18, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr88, $vgpr254, 18, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr89, 19, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr89, $vgpr254, 19, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr90, 20, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr90, $vgpr254, 20, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr91, 21, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr91, $vgpr254, 21, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr92, 22, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr92, $vgpr254, 22, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr93, 23, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr93, $vgpr254, 23, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr94, 24, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr94, $vgpr254, 24, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr95, 25, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr95, $vgpr254, 25, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr96, 26, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr96, $vgpr254, 26, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr97, 27, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr97, $vgpr254, 27, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr98, 28, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr98, $vgpr254, 28, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr99, 29, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr99, $vgpr254, 29, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr100, 30, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr100, $vgpr254, 30, 32 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr101, 31, $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr101, $vgpr254, 31, 32 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr102, 0, $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr102, $vgpr253, 0, 32 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr103, 1, $vgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr103, $vgpr253, 1, 32 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr30, 2, $vgpr4, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr31, 3, $vgpr4, implicit $sgpr30_sgpr31 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $pc_reg, $vgpr253, 2, 32, $vgpr253, 3, 32 ; GCN-NEXT: $sgpr22 = IMPLICIT_DEF ; GCN-NEXT: $vgpr5 = IMPLICIT_DEF ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr5 @@ -130,48 +247,48 @@ body: | ; GCN-NEXT: bb.3: ; GCN-NEXT: liveins: $vcc_hi ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 3 - ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 2 - ; GCN-NEXT: $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 1 - ; GCN-NEXT: $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 0 - ; GCN-NEXT: $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 31 - ; GCN-NEXT: $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 30 - ; GCN-NEXT: $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 29 - ; GCN-NEXT: $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 28 - ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 27 - ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 26 - ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 25 - ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 24 - ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 23 - ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 22 - ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 21 - ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 20 - ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 19 - ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 18 - ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 17 - ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 16 - ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 15 - ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 14 - ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 13 - ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 12 - ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 11 - ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 10 - ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 9 - ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 8 - ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 7 - ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 6 - ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 5 - ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4 - ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3 - ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2 - ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1 - ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0 - ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 31 - ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 30 - ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 29 - ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 28 - ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 27 - ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 26 + ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 2, implicit-def $sgpr30_sgpr31 + ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 3 + ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 1 + ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 0 + ; GCN-NEXT: $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 31 + ; GCN-NEXT: $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 30 + ; GCN-NEXT: $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 29 + ; GCN-NEXT: $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 28 + ; GCN-NEXT: $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 27 + ; GCN-NEXT: $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 26 + ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 25 + ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 24 + ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 23 + ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 22 + ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 21 + ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 20 + ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 19 + ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 18 + ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 17 + ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 16 + ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 15 + ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 14 + ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 13 + ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 12 + ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 11 + ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 10 + ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 9 + ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 8 + ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 7 + ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 6 + ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 5 + ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4 + ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3 + ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2 + ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1 + ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0 + ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 31 + ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 30 + ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 29 + ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 28 + ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 27 + ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 26 ; GCN-NEXT: $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 25 ; GCN-NEXT: $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 24 ; GCN-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 23 @@ -200,12 +317,13 @@ body: | ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 ; GCN-NEXT: $sgpr32 = frame-destroy COPY $sgpr33 ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5) - ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5) - ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5) - ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5) - ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5) + ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.68, addrspace 5) + ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5) + ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5) + ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5) + ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5) ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 + ; GCN-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 ; GCN-NEXT: $sgpr33 = frame-destroy COPY $vcc_hi ; GCN-NEXT: S_ENDPGM 0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir index 59c4b715dd12e..09e25075e51c5 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir @@ -23,6 +23,8 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $sgpr8 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 @@ -73,6 +75,8 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 @@ -122,6 +126,9 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 @@ -170,6 +177,10 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 @@ -220,6 +231,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 @@ -273,6 +348,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 @@ -329,6 +468,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 3 @@ -383,6 +586,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 3 @@ -443,6 +710,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 @@ -507,6 +838,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir index cac9c85130a7b..a1fc683679f9d 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir @@ -25,6 +25,9 @@ body: | ; CHECK-LABEL: name: test ; CHECK: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 ; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir index ba2e80fdc04c8..92c4249b26069 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir @@ -58,6 +58,8 @@ body: | ; GCN64-MUBUF-LABEL: name: check_spill ; GCN64-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 ; GCN64-MUBUF-NEXT: {{ $}} + ; GCN64-MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN64-MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN64-MUBUF-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN64-MUBUF-NEXT: $sgpr28 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: $sgpr29 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 @@ -222,6 +224,8 @@ body: | ; GCN32-MUBUF-LABEL: name: check_spill ; GCN32-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 ; GCN32-MUBUF-NEXT: {{ $}} + ; GCN32-MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN32-MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN32-MUBUF-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN32-MUBUF-NEXT: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -386,6 +390,8 @@ body: | ; GCN64-FLATSCR-LABEL: name: check_spill ; GCN64-FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1 ; GCN64-FLATSCR-NEXT: {{ $}} + ; GCN64-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN64-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN64-FLATSCR-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc @@ -617,6 +623,8 @@ body: | ; GCN64-MUBUF-LABEL: name: check_reload ; GCN64-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 ; GCN64-MUBUF-NEXT: {{ $}} + ; GCN64-MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN64-MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN64-MUBUF-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN64-MUBUF-NEXT: $sgpr28 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: $sgpr29 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 @@ -755,6 +763,8 @@ body: | ; GCN32-MUBUF-LABEL: name: check_reload ; GCN32-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 ; GCN32-MUBUF-NEXT: {{ $}} + ; GCN32-MUBUF-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN32-MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN32-MUBUF-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN32-MUBUF-NEXT: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -893,6 +903,8 @@ body: | ; GCN64-FLATSCR-LABEL: name: check_reload ; GCN64-FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1 ; GCN64-FLATSCR-NEXT: {{ $}} + ; GCN64-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN64-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN64-FLATSCR-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll index 702953c56a5cb..cb54b0ba629c3 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll @@ -152,8 +152,8 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 { ; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v255, 1 ; GCN-NEXT: v_readlane_b32 s30, v255, 0 +; GCN-NEXT: v_readlane_b32 s31, v255, 1 ; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload @@ -445,8 +445,8 @@ define void @spill_to_lowest_available_vgpr() #0 { ; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v254, 1 ; GCN-NEXT: v_readlane_b32 s30, v254, 0 +; GCN-NEXT: v_readlane_b32 s31, v254, 1 ; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload @@ -1632,21 +1632,14 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 { ; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 s[16:17], exec -; GCN-NEXT: s_mov_b64 exec, 1 +; GCN-NEXT: s_mov_b64 exec, 3 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: v_writelane_b32 v0, s30, 0 +; GCN-NEXT: v_writelane_b32 v0, s31, 1 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: s_mov_b64 s[16:17], exec -; GCN-NEXT: s_mov_b64 exec, 1 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: v_writelane_b32 v0, s31, 0 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[16:17] ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, child_function_ipra@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, child_function_ipra@rel32@hi+12 @@ -1656,20 +1649,12 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 { ; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: s_mov_b64 exec, 1 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v0, 0 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: s_mov_b64 exec, 1 +; GCN-NEXT: s_mov_b64 exec, 3 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s30, v0, 0 +; GCN-NEXT: v_readlane_b32 s31, v0, 1 ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_mov_b64 exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll index 7ee7c83e0122d..7feef49839ed5 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll @@ -14689,22 +14689,22 @@ define void @s_shuffle_v2i64_v8i64__15_2() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s30 ; GFX900-NEXT: s_mov_b32 s9, s31 +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -14718,22 +14718,22 @@ define void @s_shuffle_v2i64_v8i64__15_2() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s30 ; GFX90A-NEXT: s_mov_b32 s9, s31 +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -14829,22 +14829,22 @@ define void @s_shuffle_v2i64_v8i64__15_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s30 ; GFX900-NEXT: s_mov_b32 s9, s31 +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -14858,22 +14858,22 @@ define void @s_shuffle_v2i64_v8i64__15_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s30 ; GFX90A-NEXT: s_mov_b32 s9, s31 +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -14887,22 +14887,23 @@ define void @s_shuffle_v2i64_v8i64__15_4() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s30 ; GFX942-NEXT: s_mov_b32 s9, s31 -; GFX942-NEXT: s_mov_b32 s10, s12 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s11, s13 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -14923,22 +14924,22 @@ define void @s_shuffle_v2i64_v8i64__15_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s12, s30 ; GFX900-NEXT: s_mov_b32 s13, s31 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -14952,22 +14953,22 @@ define void @s_shuffle_v2i64_v8i64__15_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s12, s30 ; GFX90A-NEXT: s_mov_b32 s13, s31 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -15087,6 +15088,7 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] @@ -15096,13 +15098,13 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s30 ; GFX942-NEXT: s_mov_b32 s9, s31 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -15129,10 +15131,10 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -15170,10 +15172,10 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -15205,22 +15207,23 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s12, s30 ; GFX942-NEXT: s_mov_b32 s13, s31 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -16255,6 +16258,7 @@ define void @s_shuffle_v2i64_v8i64__12_0() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -16263,12 +16267,12 @@ define void @s_shuffle_v2i64_v8i64__12_0() { ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s10, s16 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s11, s17 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -16978,6 +16982,7 @@ define void @s_shuffle_v2i64_v8i64__12_1() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -16986,12 +16991,12 @@ define void @s_shuffle_v2i64_v8i64__12_1() { ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s10, s18 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s11, s19 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -17562,13 +17567,14 @@ define void @s_shuffle_v2i64_v8i64__9_2() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -17577,7 +17583,6 @@ define void @s_shuffle_v2i64_v8i64__9_2() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -17591,13 +17596,14 @@ define void @s_shuffle_v2i64_v8i64__9_2() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -17606,7 +17612,6 @@ define void @s_shuffle_v2i64_v8i64__9_2() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -17653,13 +17658,13 @@ define void @s_shuffle_v2i64_v8i64__10_2() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s10, s20 ; GFX900-NEXT: s_mov_b32 s11, s21 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -17680,13 +17685,13 @@ define void @s_shuffle_v2i64_v8i64__10_2() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s10, s20 ; GFX90A-NEXT: s_mov_b32 s11, s21 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -17700,6 +17705,7 @@ define void @s_shuffle_v2i64_v8i64__10_2() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -17708,13 +17714,13 @@ define void @s_shuffle_v2i64_v8i64__10_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s10, s20 ; GFX942-NEXT: s_mov_b32 s11, s21 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -17735,13 +17741,14 @@ define void @s_shuffle_v2i64_v8i64__11_2() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s22 ; GFX900-NEXT: s_mov_b32 s9, s23 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -17750,7 +17757,6 @@ define void @s_shuffle_v2i64_v8i64__11_2() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -17764,13 +17770,14 @@ define void @s_shuffle_v2i64_v8i64__11_2() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s22 ; GFX90A-NEXT: s_mov_b32 s9, s23 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -17779,7 +17786,6 @@ define void @s_shuffle_v2i64_v8i64__11_2() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -17879,13 +17885,14 @@ define void @s_shuffle_v2i64_v8i64__13_2() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s26 ; GFX900-NEXT: s_mov_b32 s9, s27 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -17894,7 +17901,6 @@ define void @s_shuffle_v2i64_v8i64__13_2() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -17908,13 +17914,14 @@ define void @s_shuffle_v2i64_v8i64__13_2() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s26 ; GFX90A-NEXT: s_mov_b32 s9, s27 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -17923,7 +17930,6 @@ define void @s_shuffle_v2i64_v8i64__13_2() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -18403,13 +18409,13 @@ define void @s_shuffle_v2i64_v8i64__10_3() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s10, s22 ; GFX900-NEXT: s_mov_b32 s11, s23 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -18430,13 +18436,13 @@ define void @s_shuffle_v2i64_v8i64__10_3() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s10, s22 ; GFX90A-NEXT: s_mov_b32 s11, s23 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -18450,6 +18456,7 @@ define void @s_shuffle_v2i64_v8i64__10_3() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -18458,13 +18465,13 @@ define void @s_shuffle_v2i64_v8i64__10_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s10, s22 ; GFX942-NEXT: s_mov_b32 s11, s23 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -19031,13 +19038,14 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -19046,7 +19054,6 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19060,13 +19067,14 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -19075,7 +19083,6 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -19089,22 +19096,23 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -19181,13 +19189,14 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s22 ; GFX900-NEXT: s_mov_b32 s9, s23 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -19196,7 +19205,6 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19210,13 +19218,14 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s22 ; GFX90A-NEXT: s_mov_b32 s9, s23 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -19225,7 +19234,6 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -19239,22 +19247,23 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s23 -; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -19275,22 +19284,22 @@ define void @s_shuffle_v2i64_v8i64__12_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s26, s12 ; GFX900-NEXT: s_mov_b32 s27, s13 ; GFX900-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19304,22 +19313,22 @@ define void @s_shuffle_v2i64_v8i64__12_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s26, s12 ; GFX90A-NEXT: s_mov_b32 s27, s13 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -19357,13 +19366,14 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s26 ; GFX900-NEXT: s_mov_b32 s9, s27 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -19372,7 +19382,6 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19386,13 +19395,14 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s26 ; GFX90A-NEXT: s_mov_b32 s9, s27 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -19401,7 +19411,6 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -19415,22 +19424,23 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s26 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s27 -; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -19451,10 +19461,10 @@ define void @s_shuffle_v2i64_v8i64__14_4() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND @@ -19462,11 +19472,11 @@ define void @s_shuffle_v2i64_v8i64__14_4() { ; GFX900-NEXT: s_mov_b32 s31, s13 ; GFX900-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX900-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19480,10 +19490,10 @@ define void @s_shuffle_v2i64_v8i64__14_4() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND @@ -19491,11 +19501,11 @@ define void @s_shuffle_v2i64_v8i64__14_4() { ; GFX90A-NEXT: s_mov_b32 s31, s13 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX90A-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -19952,22 +19962,22 @@ define void @s_shuffle_v2i64_v8i64__9_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s12, s18 ; GFX900-NEXT: s_mov_b32 s13, s19 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -19981,22 +19991,22 @@ define void @s_shuffle_v2i64_v8i64__9_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s12, s18 ; GFX90A-NEXT: s_mov_b32 s13, s19 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -20090,22 +20100,22 @@ define void @s_shuffle_v2i64_v8i64__11_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s12, s22 ; GFX900-NEXT: s_mov_b32 s13, s23 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -20119,22 +20129,22 @@ define void @s_shuffle_v2i64_v8i64__11_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s12, s22 ; GFX90A-NEXT: s_mov_b32 s13, s23 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -20172,22 +20182,22 @@ define void @s_shuffle_v2i64_v8i64__12_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s26, s14 ; GFX900-NEXT: s_mov_b32 s27, s15 ; GFX900-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -20201,22 +20211,22 @@ define void @s_shuffle_v2i64_v8i64__12_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s26, s14 ; GFX90A-NEXT: s_mov_b32 s27, s15 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -20254,22 +20264,22 @@ define void @s_shuffle_v2i64_v8i64__13_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s12, s26 ; GFX900-NEXT: s_mov_b32 s13, s27 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -20283,22 +20293,22 @@ define void @s_shuffle_v2i64_v8i64__13_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s12, s26 ; GFX90A-NEXT: s_mov_b32 s13, s27 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -20336,10 +20346,10 @@ define void @s_shuffle_v2i64_v8i64__14_5() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND @@ -20347,11 +20357,11 @@ define void @s_shuffle_v2i64_v8i64__14_5() { ; GFX900-NEXT: s_mov_b32 s31, s15 ; GFX900-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX900-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -20365,10 +20375,10 @@ define void @s_shuffle_v2i64_v8i64__14_5() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND @@ -20376,11 +20386,11 @@ define void @s_shuffle_v2i64_v8i64__14_5() { ; GFX90A-NEXT: s_mov_b32 s31, s15 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX90A-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -20934,14 +20944,16 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s19 ; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 @@ -20949,7 +20961,6 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -21108,14 +21119,16 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s22 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s23 ; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 @@ -21123,7 +21136,6 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -21332,14 +21344,16 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s26 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s27 ; GFX942-NEXT: s_mov_b32 s10, s12 ; GFX942-NEXT: s_mov_b32 s11, s13 @@ -21347,7 +21361,6 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -21450,6 +21463,7 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] @@ -21461,11 +21475,11 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX942-NEXT: s_mov_b32 s31, s13 ; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -21918,10 +21932,10 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -21959,10 +21973,10 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -21994,22 +22008,23 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s12, s18 ; GFX942-NEXT: s_mov_b32 s13, s19 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -22092,10 +22107,10 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -22133,10 +22148,10 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -22168,22 +22183,23 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s12, s22 ; GFX942-NEXT: s_mov_b32 s13, s23 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -22316,10 +22332,10 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -22357,10 +22373,10 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -22392,22 +22408,23 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s12, s26 ; GFX942-NEXT: s_mov_b32 s13, s27 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -22510,6 +22527,7 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] @@ -22521,11 +22539,11 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX942-NEXT: s_mov_b32 s31, s15 ; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -23512,22 +23530,22 @@ define void @s_shuffle_v2i64_v8i64__4_9() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s18 ; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -23541,22 +23559,22 @@ define void @s_shuffle_v2i64_v8i64__4_9() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s18 ; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -23601,13 +23619,13 @@ define void @s_shuffle_v2i64_v8i64__5_9() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:23] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s26 ; GFX900-NEXT: s_mov_b32 s9, s27 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -23628,13 +23646,13 @@ define void @s_shuffle_v2i64_v8i64__5_9() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:23] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s26 ; GFX90A-NEXT: s_mov_b32 s9, s27 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -23648,6 +23666,7 @@ define void @s_shuffle_v2i64_v8i64__5_9() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -23656,13 +23675,13 @@ define void @s_shuffle_v2i64_v8i64__5_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s8, s26 ; GFX942-NEXT: s_mov_b32 s9, s27 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -23689,10 +23708,10 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -23730,10 +23749,10 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -23765,22 +23784,23 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s18 ; GFX942-NEXT: s_mov_b32 s15, s19 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -24362,22 +24382,22 @@ define void @s_shuffle_v2i64_v8i64__4_10() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s20 ; GFX900-NEXT: s_mov_b32 s15, s21 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -24391,22 +24411,22 @@ define void @s_shuffle_v2i64_v8i64__4_10() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s20 ; GFX90A-NEXT: s_mov_b32 s15, s21 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -24444,13 +24464,14 @@ define void @s_shuffle_v2i64_v8i64__5_10() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s20 @@ -24459,7 +24480,6 @@ define void @s_shuffle_v2i64_v8i64__5_10() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -24473,13 +24493,14 @@ define void @s_shuffle_v2i64_v8i64__5_10() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s20 @@ -24488,7 +24509,6 @@ define void @s_shuffle_v2i64_v8i64__5_10() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -24533,10 +24553,10 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -24574,10 +24594,10 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -24609,22 +24629,23 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s20 ; GFX942-NEXT: s_mov_b32 s15, s21 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -24727,14 +24748,16 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s15 ; GFX942-NEXT: s_mov_b32 s10, s20 ; GFX942-NEXT: s_mov_b32 s11, s21 @@ -24742,7 +24765,6 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -25323,13 +25345,13 @@ define void @s_shuffle_v2i64_v8i64__3_11() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s22 ; GFX900-NEXT: s_mov_b32 s9, s23 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -25350,13 +25372,13 @@ define void @s_shuffle_v2i64_v8i64__3_11() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s22 ; GFX90A-NEXT: s_mov_b32 s9, s23 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -25370,6 +25392,7 @@ define void @s_shuffle_v2i64_v8i64__3_11() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -25378,13 +25401,13 @@ define void @s_shuffle_v2i64_v8i64__3_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s8, s22 ; GFX942-NEXT: s_mov_b32 s9, s23 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -25405,22 +25428,22 @@ define void @s_shuffle_v2i64_v8i64__4_11() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s22 ; GFX900-NEXT: s_mov_b32 s15, s23 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -25434,22 +25457,22 @@ define void @s_shuffle_v2i64_v8i64__4_11() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s22 ; GFX90A-NEXT: s_mov_b32 s15, s23 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -25549,10 +25572,10 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -25590,10 +25613,10 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -25625,22 +25648,23 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s22 ; GFX942-NEXT: s_mov_b32 s15, s23 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -26222,22 +26246,22 @@ define void @s_shuffle_v2i64_v8i64__4_12() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s24 ; GFX900-NEXT: s_mov_b32 s15, s25 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -26251,22 +26275,22 @@ define void @s_shuffle_v2i64_v8i64__4_12() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s24 ; GFX90A-NEXT: s_mov_b32 s15, s25 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -26304,13 +26328,14 @@ define void @s_shuffle_v2i64_v8i64__5_12() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s24 @@ -26319,7 +26344,6 @@ define void @s_shuffle_v2i64_v8i64__5_12() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -26333,13 +26357,14 @@ define void @s_shuffle_v2i64_v8i64__5_12() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s24 @@ -26348,7 +26373,6 @@ define void @s_shuffle_v2i64_v8i64__5_12() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -26393,10 +26417,10 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -26434,10 +26458,10 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -26469,22 +26493,23 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s24 ; GFX942-NEXT: s_mov_b32 s15, s25 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -26587,14 +26612,16 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s15 ; GFX942-NEXT: s_mov_b32 s10, s24 ; GFX942-NEXT: s_mov_b32 s11, s25 @@ -26602,7 +26629,6 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -26968,6 +26994,7 @@ define void @s_shuffle_v2i64_v8i64__1_13() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -26976,12 +27003,12 @@ define void @s_shuffle_v2i64_v8i64__1_13() { ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s18 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s19 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -27118,22 +27145,22 @@ define void @s_shuffle_v2i64_v8i64__4_13() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s26 ; GFX900-NEXT: s_mov_b32 s15, s27 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -27147,22 +27174,22 @@ define void @s_shuffle_v2i64_v8i64__4_13() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s26 ; GFX90A-NEXT: s_mov_b32 s15, s27 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -27200,22 +27227,22 @@ define void @s_shuffle_v2i64_v8i64__5_13() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s24, s14 ; GFX900-NEXT: s_mov_b32 s25, s15 ; GFX900-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -27229,22 +27256,22 @@ define void @s_shuffle_v2i64_v8i64__5_13() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s24, s14 ; GFX90A-NEXT: s_mov_b32 s25, s15 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[24:25] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -27288,10 +27315,10 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -27329,10 +27356,10 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -27364,22 +27391,23 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s26 ; GFX942-NEXT: s_mov_b32 s15, s27 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -28075,22 +28103,22 @@ define void @s_shuffle_v2i64_v8i64__4_14() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s28 ; GFX900-NEXT: s_mov_b32 s15, s29 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -28104,22 +28132,22 @@ define void @s_shuffle_v2i64_v8i64__4_14() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s28 ; GFX90A-NEXT: s_mov_b32 s15, s29 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -28157,13 +28185,14 @@ define void @s_shuffle_v2i64_v8i64__5_14() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s28 @@ -28172,7 +28201,6 @@ define void @s_shuffle_v2i64_v8i64__5_14() { ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -28186,13 +28214,14 @@ define void @s_shuffle_v2i64_v8i64__5_14() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s28 @@ -28201,7 +28230,6 @@ define void @s_shuffle_v2i64_v8i64__5_14() { ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -28246,10 +28274,10 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -28287,10 +28315,10 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -28322,22 +28350,23 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s28 ; GFX942-NEXT: s_mov_b32 s15, s29 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -28440,14 +28469,16 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s8, s14 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b32 s9, s15 ; GFX942-NEXT: s_mov_b32 s10, s28 ; GFX942-NEXT: s_mov_b32 s11, s29 @@ -28455,7 +28486,6 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -29056,22 +29086,22 @@ define void @s_shuffle_v2i64_v8i64__4_15() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s14, s30 ; GFX900-NEXT: s_mov_b32 s15, s31 ; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -29085,22 +29115,22 @@ define void @s_shuffle_v2i64_v8i64__4_15() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s14, s30 ; GFX90A-NEXT: s_mov_b32 s15, s31 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -29138,10 +29168,10 @@ define void @s_shuffle_v2i64_v8i64__5_15() { ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s30, 0 +; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s31, 1 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:31] ; GFX900-NEXT: ;;#ASMEND @@ -29149,11 +29179,11 @@ define void @s_shuffle_v2i64_v8i64__5_15() { ; GFX900-NEXT: s_mov_b32 s29, s15 ; GFX900-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX900-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s31, v0, 1 -; GFX900-NEXT: v_readlane_b32 s30, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] @@ -29167,10 +29197,10 @@ define void @s_shuffle_v2i64_v8i64__5_15() { ; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s30, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:31] ; GFX90A-NEXT: ;;#ASMEND @@ -29178,11 +29208,11 @@ define void @s_shuffle_v2i64_v8i64__5_15() { ; GFX90A-NEXT: s_mov_b32 s29, s15 ; GFX90A-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX90A-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] @@ -29228,10 +29258,10 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -29269,10 +29299,10 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -29304,22 +29334,23 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_mov_b32 s14, s30 ; GFX942-NEXT: s_mov_b32 s15, s31 ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] @@ -29422,6 +29453,7 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill ; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] @@ -29433,11 +29465,11 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX942-NEXT: s_mov_b32 s29, s15 ; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload ; GFX942-NEXT: s_mov_b64 exec, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir index 1ffef8e60d90d..9ebf4f57ed7d3 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir @@ -24,10 +24,11 @@ machineFunctionInfo: body: | bb.0: ; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills - ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62 + ; SGPR_SPILLED: liveins: $vgpr62, $sgpr30_sgpr31 ; SGPR_SPILLED-NEXT: {{ $}} - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62 - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr62, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62, implicit killed $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $pc_reg, $vgpr62, 0, 32, $vgpr62, 1, 32 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]], implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 @@ -89,10 +90,11 @@ machineFunctionInfo: body: | bb.0: ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first - ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62 + ; SGPR_SPILLED: liveins: $vgpr62, $sgpr30_sgpr31 ; SGPR_SPILLED-NEXT: {{ $}} - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62 - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr62, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62, implicit killed $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $pc_reg, $vgpr62, 0, 32, $vgpr62, 1, 32 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3 @@ -152,10 +154,11 @@ machineFunctionInfo: body: | bb.0: ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first - ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62 + ; SGPR_SPILLED: liveins: $vgpr62, $sgpr30_sgpr31 ; SGPR_SPILLED-NEXT: {{ $}} - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62 - ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr62, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62, implicit killed $sgpr30_sgpr31 + ; SGPR_SPILLED-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $pc_reg, $vgpr62, 0, 32, $vgpr62, 1, 32 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir index 2de7d86223eb2..2f769d94f174d 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills.mir @@ -2,9 +2,14 @@ # CHECK-LABEL: name: empty_entry_block # CHECK: SI_SPILL_S32_TO_VGPR +# CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers # CHECK-NEXT: SI_SPILL_S32_TO_VGPR +# CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers # CHECK-NEXT: SI_SPILL_S32_TO_VGPR +# CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers # CHECK-NEXT: SI_SPILL_S32_TO_VGPR +# CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers + # CHECK: SI_RESTORE_S32_FROM_VGPR # CHECK-NEXT: SI_RESTORE_S32_FROM_VGPR # CHECK-NEXT: SI_RESTORE_S32_FROM_VGPR diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll index 761ff7786b98e..3419cb3d76320 100644 --- a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll +++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll @@ -9,6 +9,15 @@ define void @__omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_ev ; GCN-NEXT: .cfi_sections .debug_frame ; GCN-NEXT: .cfi_startproc ; GCN-NEXT: ; %bb.0: ; %bb +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GCN-NEXT: .cfi_undefined 2560 +; GCN-NEXT: .cfi_undefined 2561 +; GCN-NEXT: .cfi_undefined 2562 +; GCN-NEXT: .cfi_undefined 2563 +; GCN-NEXT: .cfi_undefined 2564 +; GCN-NEXT: .cfi_undefined 36 +; GCN-NEXT: .cfi_undefined 37 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 00214ef36e1f0..98048e7ace538 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -231,19 +231,19 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, pt ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_byval_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_byval_i32@rel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -382,14 +382,15 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s4, 2 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_i32_a32i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_i32_a32i32@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GCN-NEXT: v_mov_b32_e32 v2, 0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 @@ -422,11 +423,10 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-NEXT: v_mov_b32_e32 v28, 0 ; GCN-NEXT: v_mov_b32_e32 v29, 0 ; GCN-NEXT: v_mov_b32_e32 v30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -450,16 +450,16 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i3 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v42, s4, 2 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_writelane_b32 v42, s30, 0 +; GCN-NEXT: v_writelane_b32 v42, s31, 1 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_i32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_i32@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: v_writelane_b32 v42, s30, 0 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v42, s31, 1 ; GCN-NEXT: v_mov_b32_e32 v40, v1 ; GCN-NEXT: v_mov_b32_e32 v41, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -469,11 +469,11 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i3 ; GCN-NEXT: v_mov_b32_e32 v1, v40 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: v_readlane_b32 s30, v42, 0 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, sibling_call_i32_fastcc_i32_i32@rel32@hi+12 ; GCN-NEXT: v_readlane_b32 s31, v42, 1 -; GCN-NEXT: v_readlane_b32 s30, v42, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s6, v42, 2 ; GCN-NEXT: s_or_saveexec_b64 s[8:9], -1 @@ -603,23 +603,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; FIJI-NEXT: s_mov_b64 exec, s[18:19] ; FIJI-NEXT: v_writelane_b32 v40, s16, 18 -; FIJI-NEXT: v_writelane_b32 v40, s30, 0 -; FIJI-NEXT: v_writelane_b32 v40, s31, 1 -; FIJI-NEXT: v_writelane_b32 v40, s34, 2 -; FIJI-NEXT: v_writelane_b32 v40, s35, 3 -; FIJI-NEXT: v_writelane_b32 v40, s36, 4 -; FIJI-NEXT: v_writelane_b32 v40, s37, 5 -; FIJI-NEXT: v_writelane_b32 v40, s38, 6 -; FIJI-NEXT: v_writelane_b32 v40, s39, 7 -; FIJI-NEXT: v_writelane_b32 v40, s48, 8 -; FIJI-NEXT: v_writelane_b32 v40, s49, 9 -; FIJI-NEXT: v_writelane_b32 v40, s50, 10 -; FIJI-NEXT: v_writelane_b32 v40, s51, 11 -; FIJI-NEXT: v_writelane_b32 v40, s52, 12 -; FIJI-NEXT: v_writelane_b32 v40, s53, 13 -; FIJI-NEXT: v_writelane_b32 v40, s54, 14 -; FIJI-NEXT: v_writelane_b32 v40, s55, 15 -; FIJI-NEXT: v_writelane_b32 v40, s64, 16 +; FIJI-NEXT: s_addk_i32 s32, 0x400 +; FIJI-NEXT: v_writelane_b32 v40, s34, 0 +; FIJI-NEXT: v_writelane_b32 v40, s35, 1 +; FIJI-NEXT: v_writelane_b32 v40, s36, 2 +; FIJI-NEXT: v_writelane_b32 v40, s37, 3 +; FIJI-NEXT: v_writelane_b32 v40, s38, 4 +; FIJI-NEXT: v_writelane_b32 v40, s39, 5 +; FIJI-NEXT: v_writelane_b32 v40, s48, 6 +; FIJI-NEXT: v_writelane_b32 v40, s49, 7 +; FIJI-NEXT: v_writelane_b32 v40, s50, 8 +; FIJI-NEXT: v_writelane_b32 v40, s51, 9 +; FIJI-NEXT: v_writelane_b32 v40, s52, 10 +; FIJI-NEXT: v_writelane_b32 v40, s53, 11 +; FIJI-NEXT: v_writelane_b32 v40, s54, 12 +; FIJI-NEXT: v_writelane_b32 v40, s55, 13 +; FIJI-NEXT: v_writelane_b32 v40, s64, 14 +; FIJI-NEXT: v_writelane_b32 v40, s65, 15 +; FIJI-NEXT: v_writelane_b32 v40, s30, 16 +; FIJI-NEXT: v_writelane_b32 v40, s31, 17 ; FIJI-NEXT: s_mov_b32 s50, s15 ; FIJI-NEXT: s_mov_b32 s51, s14 ; FIJI-NEXT: s_mov_b32 s52, s13 @@ -630,8 +632,6 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: s_mov_b64 s[48:49], s[4:5] ; FIJI-NEXT: v_add_u32_e32 v3, vcc, v3, v4 ; FIJI-NEXT: s_mov_b64 s[54:55], exec -; FIJI-NEXT: s_addk_i32 s32, 0x400 -; FIJI-NEXT: v_writelane_b32 v40, s65, 17 ; FIJI-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 ; FIJI-NEXT: v_readfirstlane_b32 s16, v0 ; FIJI-NEXT: v_readfirstlane_b32 s17, v1 @@ -657,25 +657,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: s_cbranch_execnz .LBB18_1 ; FIJI-NEXT: ; %bb.2: ; FIJI-NEXT: s_mov_b64 exec, s[54:55] +; FIJI-NEXT: v_readlane_b32 s30, v40, 16 ; FIJI-NEXT: v_mov_b32_e32 v0, v4 -; FIJI-NEXT: v_readlane_b32 s65, v40, 17 -; FIJI-NEXT: v_readlane_b32 s64, v40, 16 -; FIJI-NEXT: v_readlane_b32 s55, v40, 15 -; FIJI-NEXT: v_readlane_b32 s54, v40, 14 -; FIJI-NEXT: v_readlane_b32 s53, v40, 13 -; FIJI-NEXT: v_readlane_b32 s52, v40, 12 -; FIJI-NEXT: v_readlane_b32 s51, v40, 11 -; FIJI-NEXT: v_readlane_b32 s50, v40, 10 -; FIJI-NEXT: v_readlane_b32 s49, v40, 9 -; FIJI-NEXT: v_readlane_b32 s48, v40, 8 -; FIJI-NEXT: v_readlane_b32 s39, v40, 7 -; FIJI-NEXT: v_readlane_b32 s38, v40, 6 -; FIJI-NEXT: v_readlane_b32 s37, v40, 5 -; FIJI-NEXT: v_readlane_b32 s36, v40, 4 -; FIJI-NEXT: v_readlane_b32 s35, v40, 3 -; FIJI-NEXT: v_readlane_b32 s34, v40, 2 -; FIJI-NEXT: v_readlane_b32 s31, v40, 1 -; FIJI-NEXT: v_readlane_b32 s30, v40, 0 +; FIJI-NEXT: v_readlane_b32 s31, v40, 17 +; FIJI-NEXT: v_readlane_b32 s65, v40, 15 +; FIJI-NEXT: v_readlane_b32 s64, v40, 14 +; FIJI-NEXT: v_readlane_b32 s55, v40, 13 +; FIJI-NEXT: v_readlane_b32 s54, v40, 12 +; FIJI-NEXT: v_readlane_b32 s53, v40, 11 +; FIJI-NEXT: v_readlane_b32 s52, v40, 10 +; FIJI-NEXT: v_readlane_b32 s51, v40, 9 +; FIJI-NEXT: v_readlane_b32 s50, v40, 8 +; FIJI-NEXT: v_readlane_b32 s49, v40, 7 +; FIJI-NEXT: v_readlane_b32 s48, v40, 6 +; FIJI-NEXT: v_readlane_b32 s39, v40, 5 +; FIJI-NEXT: v_readlane_b32 s38, v40, 4 +; FIJI-NEXT: v_readlane_b32 s37, v40, 3 +; FIJI-NEXT: v_readlane_b32 s36, v40, 2 +; FIJI-NEXT: v_readlane_b32 s35, v40, 1 +; FIJI-NEXT: v_readlane_b32 s34, v40, 0 ; FIJI-NEXT: s_mov_b32 s32, s33 ; FIJI-NEXT: v_readlane_b32 s4, v40, 18 ; FIJI-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -694,23 +694,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; HAWAII-NEXT: s_mov_b64 exec, s[18:19] ; HAWAII-NEXT: v_writelane_b32 v40, s16, 18 -; HAWAII-NEXT: v_writelane_b32 v40, s30, 0 -; HAWAII-NEXT: v_writelane_b32 v40, s31, 1 -; HAWAII-NEXT: v_writelane_b32 v40, s34, 2 -; HAWAII-NEXT: v_writelane_b32 v40, s35, 3 -; HAWAII-NEXT: v_writelane_b32 v40, s36, 4 -; HAWAII-NEXT: v_writelane_b32 v40, s37, 5 -; HAWAII-NEXT: v_writelane_b32 v40, s38, 6 -; HAWAII-NEXT: v_writelane_b32 v40, s39, 7 -; HAWAII-NEXT: v_writelane_b32 v40, s48, 8 -; HAWAII-NEXT: v_writelane_b32 v40, s49, 9 -; HAWAII-NEXT: v_writelane_b32 v40, s50, 10 -; HAWAII-NEXT: v_writelane_b32 v40, s51, 11 -; HAWAII-NEXT: v_writelane_b32 v40, s52, 12 -; HAWAII-NEXT: v_writelane_b32 v40, s53, 13 -; HAWAII-NEXT: v_writelane_b32 v40, s54, 14 -; HAWAII-NEXT: v_writelane_b32 v40, s55, 15 -; HAWAII-NEXT: v_writelane_b32 v40, s64, 16 +; HAWAII-NEXT: s_addk_i32 s32, 0x400 +; HAWAII-NEXT: v_writelane_b32 v40, s34, 0 +; HAWAII-NEXT: v_writelane_b32 v40, s35, 1 +; HAWAII-NEXT: v_writelane_b32 v40, s36, 2 +; HAWAII-NEXT: v_writelane_b32 v40, s37, 3 +; HAWAII-NEXT: v_writelane_b32 v40, s38, 4 +; HAWAII-NEXT: v_writelane_b32 v40, s39, 5 +; HAWAII-NEXT: v_writelane_b32 v40, s48, 6 +; HAWAII-NEXT: v_writelane_b32 v40, s49, 7 +; HAWAII-NEXT: v_writelane_b32 v40, s50, 8 +; HAWAII-NEXT: v_writelane_b32 v40, s51, 9 +; HAWAII-NEXT: v_writelane_b32 v40, s52, 10 +; HAWAII-NEXT: v_writelane_b32 v40, s53, 11 +; HAWAII-NEXT: v_writelane_b32 v40, s54, 12 +; HAWAII-NEXT: v_writelane_b32 v40, s55, 13 +; HAWAII-NEXT: v_writelane_b32 v40, s64, 14 +; HAWAII-NEXT: v_writelane_b32 v40, s65, 15 +; HAWAII-NEXT: v_writelane_b32 v40, s30, 16 +; HAWAII-NEXT: v_writelane_b32 v40, s31, 17 ; HAWAII-NEXT: s_mov_b32 s50, s15 ; HAWAII-NEXT: s_mov_b32 s51, s14 ; HAWAII-NEXT: s_mov_b32 s52, s13 @@ -721,8 +723,6 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: s_mov_b64 s[48:49], s[4:5] ; HAWAII-NEXT: v_add_i32_e32 v3, vcc, v3, v4 ; HAWAII-NEXT: s_mov_b64 s[54:55], exec -; HAWAII-NEXT: s_addk_i32 s32, 0x400 -; HAWAII-NEXT: v_writelane_b32 v40, s65, 17 ; HAWAII-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 ; HAWAII-NEXT: v_readfirstlane_b32 s16, v0 ; HAWAII-NEXT: v_readfirstlane_b32 s17, v1 @@ -748,25 +748,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: s_cbranch_execnz .LBB18_1 ; HAWAII-NEXT: ; %bb.2: ; HAWAII-NEXT: s_mov_b64 exec, s[54:55] +; HAWAII-NEXT: v_readlane_b32 s30, v40, 16 ; HAWAII-NEXT: v_mov_b32_e32 v0, v4 -; HAWAII-NEXT: v_readlane_b32 s65, v40, 17 -; HAWAII-NEXT: v_readlane_b32 s64, v40, 16 -; HAWAII-NEXT: v_readlane_b32 s55, v40, 15 -; HAWAII-NEXT: v_readlane_b32 s54, v40, 14 -; HAWAII-NEXT: v_readlane_b32 s53, v40, 13 -; HAWAII-NEXT: v_readlane_b32 s52, v40, 12 -; HAWAII-NEXT: v_readlane_b32 s51, v40, 11 -; HAWAII-NEXT: v_readlane_b32 s50, v40, 10 -; HAWAII-NEXT: v_readlane_b32 s49, v40, 9 -; HAWAII-NEXT: v_readlane_b32 s48, v40, 8 -; HAWAII-NEXT: v_readlane_b32 s39, v40, 7 -; HAWAII-NEXT: v_readlane_b32 s38, v40, 6 -; HAWAII-NEXT: v_readlane_b32 s37, v40, 5 -; HAWAII-NEXT: v_readlane_b32 s36, v40, 4 -; HAWAII-NEXT: v_readlane_b32 s35, v40, 3 -; HAWAII-NEXT: v_readlane_b32 s34, v40, 2 -; HAWAII-NEXT: v_readlane_b32 s31, v40, 1 -; HAWAII-NEXT: v_readlane_b32 s30, v40, 0 +; HAWAII-NEXT: v_readlane_b32 s31, v40, 17 +; HAWAII-NEXT: v_readlane_b32 s65, v40, 15 +; HAWAII-NEXT: v_readlane_b32 s64, v40, 14 +; HAWAII-NEXT: v_readlane_b32 s55, v40, 13 +; HAWAII-NEXT: v_readlane_b32 s54, v40, 12 +; HAWAII-NEXT: v_readlane_b32 s53, v40, 11 +; HAWAII-NEXT: v_readlane_b32 s52, v40, 10 +; HAWAII-NEXT: v_readlane_b32 s51, v40, 9 +; HAWAII-NEXT: v_readlane_b32 s50, v40, 8 +; HAWAII-NEXT: v_readlane_b32 s49, v40, 7 +; HAWAII-NEXT: v_readlane_b32 s48, v40, 6 +; HAWAII-NEXT: v_readlane_b32 s39, v40, 5 +; HAWAII-NEXT: v_readlane_b32 s38, v40, 4 +; HAWAII-NEXT: v_readlane_b32 s37, v40, 3 +; HAWAII-NEXT: v_readlane_b32 s36, v40, 2 +; HAWAII-NEXT: v_readlane_b32 s35, v40, 1 +; HAWAII-NEXT: v_readlane_b32 s34, v40, 0 ; HAWAII-NEXT: s_mov_b32 s32, s33 ; HAWAII-NEXT: v_readlane_b32 s4, v40, 18 ; HAWAII-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -785,23 +785,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-NEXT: v_writelane_b32 v40, s16, 18 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s35, 3 -; GFX9-NEXT: v_writelane_b32 v40, s36, 4 -; GFX9-NEXT: v_writelane_b32 v40, s37, 5 -; GFX9-NEXT: v_writelane_b32 v40, s38, 6 -; GFX9-NEXT: v_writelane_b32 v40, s39, 7 -; GFX9-NEXT: v_writelane_b32 v40, s48, 8 -; GFX9-NEXT: v_writelane_b32 v40, s49, 9 -; GFX9-NEXT: v_writelane_b32 v40, s50, 10 -; GFX9-NEXT: v_writelane_b32 v40, s51, 11 -; GFX9-NEXT: v_writelane_b32 v40, s52, 12 -; GFX9-NEXT: v_writelane_b32 v40, s53, 13 -; GFX9-NEXT: v_writelane_b32 v40, s54, 14 -; GFX9-NEXT: v_writelane_b32 v40, s55, 15 -; GFX9-NEXT: v_writelane_b32 v40, s64, 16 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s35, 1 +; GFX9-NEXT: v_writelane_b32 v40, s36, 2 +; GFX9-NEXT: v_writelane_b32 v40, s37, 3 +; GFX9-NEXT: v_writelane_b32 v40, s38, 4 +; GFX9-NEXT: v_writelane_b32 v40, s39, 5 +; GFX9-NEXT: v_writelane_b32 v40, s48, 6 +; GFX9-NEXT: v_writelane_b32 v40, s49, 7 +; GFX9-NEXT: v_writelane_b32 v40, s50, 8 +; GFX9-NEXT: v_writelane_b32 v40, s51, 9 +; GFX9-NEXT: v_writelane_b32 v40, s52, 10 +; GFX9-NEXT: v_writelane_b32 v40, s53, 11 +; GFX9-NEXT: v_writelane_b32 v40, s54, 12 +; GFX9-NEXT: v_writelane_b32 v40, s55, 13 +; GFX9-NEXT: v_writelane_b32 v40, s64, 14 +; GFX9-NEXT: v_writelane_b32 v40, s65, 15 +; GFX9-NEXT: v_writelane_b32 v40, s30, 16 +; GFX9-NEXT: v_writelane_b32 v40, s31, 17 ; GFX9-NEXT: s_mov_b32 s50, s15 ; GFX9-NEXT: s_mov_b32 s51, s14 ; GFX9-NEXT: s_mov_b32 s52, s13 @@ -812,8 +814,6 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: s_mov_b64 s[48:49], s[4:5] ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 ; GFX9-NEXT: s_mov_b64 s[54:55], exec -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s65, 17 ; GFX9-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_readfirstlane_b32 s16, v0 ; GFX9-NEXT: v_readfirstlane_b32 s17, v1 @@ -839,25 +839,25 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: s_cbranch_execnz .LBB18_1 ; GFX9-NEXT: ; %bb.2: ; GFX9-NEXT: s_mov_b64 exec, s[54:55] +; GFX9-NEXT: v_readlane_b32 s30, v40, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, v4 -; GFX9-NEXT: v_readlane_b32 s65, v40, 17 -; GFX9-NEXT: v_readlane_b32 s64, v40, 16 -; GFX9-NEXT: v_readlane_b32 s55, v40, 15 -; GFX9-NEXT: v_readlane_b32 s54, v40, 14 -; GFX9-NEXT: v_readlane_b32 s53, v40, 13 -; GFX9-NEXT: v_readlane_b32 s52, v40, 12 -; GFX9-NEXT: v_readlane_b32 s51, v40, 11 -; GFX9-NEXT: v_readlane_b32 s50, v40, 10 -; GFX9-NEXT: v_readlane_b32 s49, v40, 9 -; GFX9-NEXT: v_readlane_b32 s48, v40, 8 -; GFX9-NEXT: v_readlane_b32 s39, v40, 7 -; GFX9-NEXT: v_readlane_b32 s38, v40, 6 -; GFX9-NEXT: v_readlane_b32 s37, v40, 5 -; GFX9-NEXT: v_readlane_b32 s36, v40, 4 -; GFX9-NEXT: v_readlane_b32 s35, v40, 3 -; GFX9-NEXT: v_readlane_b32 s34, v40, 2 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 17 +; GFX9-NEXT: v_readlane_b32 s65, v40, 15 +; GFX9-NEXT: v_readlane_b32 s64, v40, 14 +; GFX9-NEXT: v_readlane_b32 s55, v40, 13 +; GFX9-NEXT: v_readlane_b32 s54, v40, 12 +; GFX9-NEXT: v_readlane_b32 s53, v40, 11 +; GFX9-NEXT: v_readlane_b32 s52, v40, 10 +; GFX9-NEXT: v_readlane_b32 s51, v40, 9 +; GFX9-NEXT: v_readlane_b32 s50, v40, 8 +; GFX9-NEXT: v_readlane_b32 s49, v40, 7 +; GFX9-NEXT: v_readlane_b32 s48, v40, 6 +; GFX9-NEXT: v_readlane_b32 s39, v40, 5 +; GFX9-NEXT: v_readlane_b32 s38, v40, 4 +; GFX9-NEXT: v_readlane_b32 s37, v40, 3 +; GFX9-NEXT: v_readlane_b32 s36, v40, 2 +; GFX9-NEXT: v_readlane_b32 s35, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v40, 18 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir index 9d25df4738709..cfa0ee97e83d0 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir @@ -17,6 +17,8 @@ body: | ; CHECK-LABEL: name: spill_a64_kill ; CHECK: liveins: $agpr0_agpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -42,6 +44,8 @@ body: | ; CHECK-LABEL: name: spill_a64_undef_sub1_killed ; CHECK: liveins: $agpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -65,6 +69,8 @@ body: | ; CHECK-LABEL: name: spill_a64_undef_sub0_killed ; CHECK: liveins: $agpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1 @@ -84,7 +90,9 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: spill_a32_undef - ; CHECK: S_ENDPGM 0 + ; CHECK: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: S_ENDPGM 0 SI_SPILL_A32_SAVE undef $agpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) S_ENDPGM 0 ... @@ -101,7 +109,9 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: spill_a64_undef - ; CHECK: S_ENDPGM 0 + ; CHECK: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: S_ENDPGM 0 SI_SPILL_A64_SAVE undef $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir index 3f6956b83ae92..d4241fb0c53f1 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir @@ -38,6 +38,12 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 ; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 @@ -82,6 +88,12 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 ; GFX90A-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 @@ -141,6 +153,12 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1 ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -181,6 +199,12 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1 ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; GFX90A-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 @@ -253,6 +277,9 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 ; GFX908-EXPANDED-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec ; GFX908-EXPANDED-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) @@ -319,6 +346,9 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0 ; GFX90A-EXPANDED-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GFX90A-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc @@ -402,6 +432,14 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2 ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; GFX908-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -444,6 +482,14 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2 ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; GFX90A-EXPANDED-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -503,6 +549,16 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3 ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX908-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -547,6 +603,16 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3 ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX90A-EXPANDED-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -608,6 +674,18 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX908-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -654,6 +732,18 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; GFX90A-EXPANDED-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -717,6 +807,20 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX908-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX908-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -765,6 +869,20 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX90A-EXPANDED-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; GFX90A-EXPANDED-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -830,6 +948,24 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX908-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX908-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -882,6 +1018,24 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX90A-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; GFX90A-EXPANDED-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -951,6 +1105,26 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; GFX908-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; GFX908-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1005,6 +1179,26 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; GFX90A-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 ; GFX90A-EXPANDED-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1076,6 +1270,28 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; GFX908-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; GFX908-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1132,6 +1348,28 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; GFX90A-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 ; GFX90A-EXPANDED-NEXT: $vgpr8 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1205,6 +1443,30 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; GFX908-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; GFX908-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1263,6 +1525,30 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; GFX90A-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 ; GFX90A-EXPANDED-NEXT: $vgpr9 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1338,6 +1624,32 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; GFX908-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; GFX908-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1398,6 +1710,32 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; GFX90A-EXPANDED-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 ; GFX90A-EXPANDED-NEXT: $vgpr10 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1475,6 +1813,40 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX908-EXPANDED-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX908-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1543,6 +1915,40 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX90A-EXPANDED-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; GFX90A-EXPANDED-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1628,6 +2034,72 @@ body: | ; GFX908-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX908-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; GFX908-EXPANDED-NEXT: {{ $}} + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX908-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX908-EXPANDED-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX908-EXPANDED-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec @@ -1728,6 +2200,72 @@ body: | ; GFX90A-EXPANDED-NEXT: successors: %bb.1(0x80000000) ; GFX90A-EXPANDED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; GFX90A-EXPANDED-NEXT: {{ $}} + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr18 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr19 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr20 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr21 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr22 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr23 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr24 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr25 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr26 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr27 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr28 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr29 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr30 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr8 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr9 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr10 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr11 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr12 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr13 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr14 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr15 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr16 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr17 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr18 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr19 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr20 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr21 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr22 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr23 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr24 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr25 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr30 + ; GFX90A-EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr31 ; GFX90A-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX90A-EXPANDED-NEXT: $vgpr31 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; GFX90A-EXPANDED-NEXT: $vgpr30 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir b/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir index 24c631ce5e15f..7b3402494f39f 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-partial-csr-sgpr-live-ins.mir @@ -16,10 +16,15 @@ body: | ; CHECK: liveins: $sgpr50, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $vgpr63, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr50, 0, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr50, $vgpr63, 0, 32 ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr52, 1, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr52, $vgpr63, 1, 32 ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr53, 2, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr53, $vgpr63, 2, 32 ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr54, 3, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr54, $vgpr63, 3, 32 ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr55, 4, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr55, $vgpr63, 4, 32 ; CHECK-NEXT: S_NOP 0, implicit $sgpr50 ; CHECK-NEXT: $sgpr50 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit $sgpr52 diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir index f4edafd9443ab..be5295cf2affd 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -22,8 +22,17 @@ body: | ; GCN-LABEL: name: spill_sgpr128_use_subreg ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF @@ -60,8 +69,16 @@ body: | ; GCN-LABEL: name: spill_sgpr128_use_kill ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF @@ -95,6 +112,10 @@ body: | ; GCN-LABEL: name: spill_vgpr128_use_subreg ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8 ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) @@ -125,6 +146,9 @@ body: | ; GCN-LABEL: name: spill_vgpr128_use_kill ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir index 85a615c3d8ae8..866ce8a0c0293 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir @@ -13,6 +13,7 @@ body: | ; CHECK: liveins: $sgpr50, $vgpr63 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr50, 0, $vgpr63 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr50, $vgpr63, 0, 32 ; CHECK-NEXT: S_NOP 0, implicit $sgpr50 ; CHECK-NEXT: $sgpr50 = S_MOV_B32 0 S_NOP 0, implicit $sgpr50 diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir index fa3fd3bc6da5b..b0be5676e26a2 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir @@ -56,21 +56,37 @@ body: | ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr64, $vgpr63, 0, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr65, $vgpr63, 1, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr66, $vgpr63, 2, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr67, 3, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr67, $vgpr63, 3, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr68, 4, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr68, $vgpr63, 4, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr69, 5, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr69, $vgpr63, 5, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr70, 6, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr70, $vgpr63, 6, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr71, 7, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr71, $vgpr63, 7, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr80, 8, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr80, $vgpr63, 8, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr81, 9, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr81, $vgpr63, 9, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr82, 10, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr82, $vgpr63, 10, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr83, 11, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr83, $vgpr63, 11, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr84, 12, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr84, $vgpr63, 12, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr85, 13, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr85, $vgpr63, 13, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr86, 14, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr86, $vgpr63, 14, 32 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr87, 15, $vgpr63 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_vector_registers $sgpr87, $vgpr63, 15, 32 ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir index 6e8a5126ca823..cfa09c149e4c6 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir @@ -21,6 +21,12 @@ body: | ; GCN-LABEL: name: shift_back_exec_copy_reserved_reg ; GCN: liveins: $sgpr30_sgpr31, $vgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0 ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec @@ -58,6 +64,14 @@ body: | ; GCN-LABEL: name: spill_exec_copy_reserved_reg ; GCN: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr30_sgpr31, $vcc, $vgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0 ; GCN-NEXT: $sgpr40_sgpr41 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir index 639bf6a6d550c..3531b3dd75792 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -40,6 +40,8 @@ body: | ; GFX9-LABEL: name: check_vcc ; GFX9: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9 ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX9-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX9-NEXT: $sgpr33 = S_MOV_B32 0 ; GFX9-NEXT: $sgpr12 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9-NEXT: $sgpr13 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 @@ -77,6 +79,8 @@ body: | ; GFX10-LABEL: name: check_vcc ; GFX10: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10-NEXT: $sgpr33 = S_MOV_B32 0 ; GFX10-NEXT: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10-NEXT: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 @@ -114,6 +118,8 @@ body: | ; GFX11-LABEL: name: check_vcc ; GFX11: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11-NEXT: $sgpr33 = S_MOV_B32 0 ; GFX11-NEXT: $vcc = IMPLICIT_DEF ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec diff --git a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir index 52593e01eafde..da80320bc1af1 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir @@ -16,6 +16,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_1_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr28_agpr29, $agpr24_agpr25_agpr26_agpr27, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 @@ -41,6 +47,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_2_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr28_agpr29, $agpr24_agpr25_agpr26_agpr27, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -68,6 +80,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_3_of_4 ; GCN: liveins: $agpr28, $agpr29, $agpr30, $agpr31, $agpr24_agpr25_agpr26_agpr27, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 ; GCN-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -97,6 +115,16 @@ body: | ; GCN-LABEL: name: full_spill_v128 ; GCN: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -126,6 +154,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_1_of_4 ; GCN: liveins: $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr52_vgpr53, $vgpr48_vgpr49_vgpr50_vgpr51, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 @@ -151,6 +185,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_2_of_4 ; GCN: liveins: $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr52_vgpr53, $vgpr48_vgpr49_vgpr50_vgpr51, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -178,6 +218,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_3_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr48_vgpr49_vgpr50_vgpr51, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 @@ -207,6 +253,16 @@ body: | ; GCN-LABEL: name: full_spill_a128 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll index bfadfd860edf6..94e5f936a35fd 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll @@ -14,15 +14,15 @@ define i32 @non_entry_func(i32 %x) { ; CHECK-NEXT: scratch_store_b32 off, v2, s32 offset:100 ; 4-byte Folded Spill ; CHECK-NEXT: s_wait_alu 0xfffe ; CHECK-NEXT: s_mov_b32 exec_lo, s0 -; CHECK-NEXT: v_writelane_b32 v2, s48, 0 ; CHECK-NEXT: s_mov_b32 m0, 0x110003 -; CHECK-NEXT: v_mov_b32_e32 v1, v0 ; CHECK-NEXT: ; transferring at most v40 v41 v56 v60 ; 128-byte Folded Spill ; CHECK-NEXT: scratch_store_block off, v[40:71], s32 offset:4 ; CHECK-NEXT: s_mov_b32 m0, 1 -; CHECK-NEXT: v_writelane_b32 v2, s49, 1 ; CHECK-NEXT: ; transferring at most v120 ; 128-byte Folded Spill ; CHECK-NEXT: scratch_store_block off, v[120:151], s32 +; CHECK-NEXT: v_writelane_b32 v2, s48, 0 +; CHECK-NEXT: v_mov_b32_e32 v1, v0 +; CHECK-NEXT: v_writelane_b32 v2, s49, 1 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_nop ; CHECK-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir b/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir index 0c694d9f49e18..79a95cbf52391 100644 --- a/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir +++ b/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir @@ -16,6 +16,9 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5) ; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16 @@ -62,6 +65,9 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5) ; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16 @@ -110,6 +116,9 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5) ; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16 diff --git a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll index e962d1bad9779..1184d1a94c3dc 100644 --- a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll @@ -142,8 +142,8 @@ define void @spill_more_than_wavesize_csr_sgprs_with_stack_object() { ; CHECK-NEXT: v_writelane_b32 v1, s99, 32 ; CHECK-NEXT: v_writelane_b32 v1, s100, 33 ; CHECK-NEXT: v_writelane_b32 v1, s101, 34 -; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: v_writelane_b32 v1, s102, 35 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.mir b/llvm/test/CodeGen/AMDGPU/spillv16.mir index 05569bf394c43..7be0bfa3e3fc8 100644 --- a/llvm/test/CodeGen/AMDGPU/spillv16.mir +++ b/llvm/test/CodeGen/AMDGPU/spillv16.mir @@ -32,6 +32,9 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; EXPANDED-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, addrspace 5) ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll index 109c7d638f924..dabdc95b73fa5 100644 --- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll +++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll @@ -13,6 +13,8 @@ define hidden <4 x float> @split_v4f32_arg(<4 x float> returned %arg) local_unna ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp0: ; GCN-NEXT: .loc 0 4 5 prologue_end ; /tmp/dbg.cl:4:5 @@ -35,6 +37,12 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float> ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GCN-NEXT: .cfi_undefined 2560 +; GCN-NEXT: .cfi_undefined 2561 +; GCN-NEXT: .cfi_undefined 2562 +; GCN-NEXT: .cfi_undefined 2563 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp2: ; GCN-NEXT: .loc 0 8 17 prologue_end ; /tmp/dbg.cl:8:17 @@ -65,6 +73,8 @@ define hidden <4 x half> @split_v4f16_arg(<4 x half> returned %arg) local_unname ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp8: ; GCN-NEXT: .loc 0 12 5 prologue_end ; /tmp/dbg.cl:12:5 @@ -83,6 +93,8 @@ define hidden double @split_f64_arg(double returned %arg) local_unnamed_addr #0 ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp10: ; GCN-NEXT: .loc 0 16 5 prologue_end ; /tmp/dbg.cl:16:5 @@ -103,6 +115,8 @@ define hidden <2 x double> @split_v2f64_arg(<2 x double> returned %arg) local_un ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp12: ; GCN-NEXT: .loc 0 20 5 prologue_end ; /tmp/dbg.cl:20:5 @@ -121,6 +135,8 @@ define hidden i64 @split_i64_arg(i64 returned %arg) local_unnamed_addr #0 !dbg ! ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp14: ; GCN-NEXT: .loc 0 24 5 prologue_end ; /tmp/dbg.cl:24:5 @@ -139,6 +155,8 @@ define hidden ptr addrspace(1) @split_ptr_arg(ptr addrspace(1) readnone returned ; GCN-NEXT: ; %bb.0: ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1 ; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0 +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: .Ltmp16: ; GCN-NEXT: .loc 0 28 5 prologue_end ; /tmp/dbg.cl:28:5 diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll index 9cb22dad86b88..d57a9ca42efa5 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -32,7 +32,6 @@ define void @needs_align16_default_stack_align(i32 %idx) #0 { ; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] -; GCN: ; ScratchSize: 144 %alloca.align16 = alloca [8 x <4 x i32>], align 16, addrspace(5) %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx store volatile <4 x i32> , ptr addrspace(5) %gep0, align 16 @@ -46,6 +45,8 @@ define void @needs_align16_stack_align4(i32 %idx) #2 { ; GCN-NEXT: s_mov_b32 s4, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0x3c0 ; GCN-NEXT: s_and_b32 s33, s33, 0xfffffc00 +; GCN-NEXT: s_mov_b32 s5, s34 +; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GCN-NEXT: v_lshrrev_b32_e64 v2, 6, s33 ; GCN-NEXT: v_add_u32_e32 v0, vcc, v0, v2 @@ -58,8 +59,6 @@ define void @needs_align16_stack_align4(i32 %idx) #2 { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_or_b32_e32 v1, 8, v0 ; GCN-NEXT: v_mov_b32_e32 v2, 3 -; GCN-NEXT: s_mov_b32 s5, s34 -; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: s_addk_i32 s32, 0x2800 ; GCN-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -71,7 +70,6 @@ define void @needs_align16_stack_align4(i32 %idx) #2 { ; GCN-NEXT: s_mov_b32 s34, s5 ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_setpc_b64 s[30:31] -; GCN: ; ScratchSize: 160 %alloca.align16 = alloca [8 x <4 x i32>], align 16, addrspace(5) %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx store volatile <4 x i32> , ptr addrspace(5) %gep0, align 16 @@ -86,6 +84,8 @@ define void @needs_align32(i32 %idx) #0 { ; GCN-NEXT: s_mov_b32 s4, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0x7c0 ; GCN-NEXT: s_and_b32 s33, s33, 0xfffff800 +; GCN-NEXT: s_mov_b32 s5, s34 +; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GCN-NEXT: v_lshrrev_b32_e64 v2, 6, s33 ; GCN-NEXT: v_add_u32_e32 v0, vcc, v0, v2 @@ -98,8 +98,6 @@ define void @needs_align32(i32 %idx) #0 { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_or_b32_e32 v1, 8, v0 ; GCN-NEXT: v_mov_b32_e32 v2, 3 -; GCN-NEXT: s_mov_b32 s5, s34 -; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: s_addk_i32 s32, 0x3000 ; GCN-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -111,7 +109,6 @@ define void @needs_align32(i32 %idx) #0 { ; GCN-NEXT: s_mov_b32 s34, s5 ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_setpc_b64 s[30:31] -; GCN: ; ScratchSize: 192 %alloca.align16 = alloca [8 x <4 x i32>], align 32, addrspace(5) %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx store volatile <4 x i32> , ptr addrspace(5) %gep0, align 32 @@ -125,10 +122,10 @@ define void @force_realign4(i32 %idx) #1 { ; GCN-NEXT: s_mov_b32 s4, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0xc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffff00 -; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GCN-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GCN-NEXT: s_mov_b32 s5, s34 ; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GCN-NEXT: s_addk_i32 s32, 0xd00 ; GCN-NEXT: v_add_u32_e32 v0, vcc, v0, v1 ; GCN-NEXT: v_mov_b32_e32 v1, 3 @@ -138,7 +135,6 @@ define void @force_realign4(i32 %idx) #1 { ; GCN-NEXT: s_mov_b32 s34, s5 ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_setpc_b64 s[30:31] -; GCN: ; ScratchSize: 52 %alloca.align16 = alloca [8 x i32], align 4, addrspace(5) %gep0 = getelementptr inbounds [8 x i32], ptr addrspace(5) %alloca.align16, i32 0, i32 %idx store volatile i32 3, ptr addrspace(5) %gep0, align 4 @@ -295,28 +291,28 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 { ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:1028 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 2 -; GCN-NEXT: v_mov_b32_e32 v32, 0 ; GCN-NEXT: v_writelane_b32 v40, s34, 3 ; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: s_add_i32 s32, s32, 0x30000 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_mov_b32_e32 v32, 0 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:1024 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s34 ; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s34 offset:4 -; GCN-NEXT: s_add_i32 s32, s32, 0x30000 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: s_mov_b32 s32, s34 ; GCN-NEXT: v_readlane_b32 s4, v40, 2 ; GCN-NEXT: v_readlane_b32 s34, v40, 3 @@ -346,8 +342,8 @@ define i32 @needs_align1024_stack_args_used_inside_loop(ptr addrspace(5) nocaptu ; GCN-NEXT: s_mov_b32 s11, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0xffc0 ; GCN-NEXT: s_mov_b32 s14, s34 -; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: s_and_b32 s33, s33, 0xffff0000 +; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: v_lshrrev_b32_e64 v1, 6, s34 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_mov_b32 s10, 0 @@ -416,12 +412,12 @@ define void @no_free_scratch_sgpr_for_bp_copy(<32 x i32> %a, i32 %b) #0 { ; GCN-LABEL: no_free_scratch_sgpr_for_bp_copy: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s41, s34 -; GCN-NEXT: s_mov_b32 s34, s32 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 ; GCN-NEXT: s_mov_b32 s40, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000 +; GCN-NEXT: s_mov_b32 s41, s34 +; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 ; GCN-NEXT: s_addk_i32 s32, 0x6000 ; GCN-NEXT: s_mov_b32 s32, s34 ; GCN-NEXT: s_mov_b32 s34, s41 @@ -457,7 +453,7 @@ define void @no_free_regs_spill_bp_to_memory(<32 x i32> %a, i32 %b) #5 { ; GCN-NEXT: v_writelane_b32 v39, s4, 32 ; GCN-NEXT: v_writelane_b32 v39, s34, 33 ; GCN-NEXT: s_mov_b32 s34, s32 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 +; GCN-NEXT: s_addk_i32 s32, 0x6000 ; GCN-NEXT: v_writelane_b32 v39, s39, 0 ; GCN-NEXT: v_writelane_b32 v39, s48, 1 ; GCN-NEXT: v_writelane_b32 v39, s49, 2 @@ -489,8 +485,8 @@ define void @no_free_regs_spill_bp_to_memory(<32 x i32> %a, i32 %b) #5 { ; GCN-NEXT: v_writelane_b32 v39, s99, 28 ; GCN-NEXT: v_writelane_b32 v39, s100, 29 ; GCN-NEXT: v_writelane_b32 v39, s101, 30 -; GCN-NEXT: s_addk_i32 s32, 0x6000 ; GCN-NEXT: v_writelane_b32 v39, s102, 31 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 ; GCN-NEXT: s_mov_b32 s32, s34 ; GCN-NEXT: v_readlane_b32 s34, v39, 33 ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -580,7 +576,7 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i ; GCN-NEXT: v_writelane_b32 v39, s4, 32 ; GCN-NEXT: v_writelane_b32 v39, s34, 33 ; GCN-NEXT: s_mov_b32 s34, s32 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 +; GCN-NEXT: s_add_i32 s32, s32, 0x46000 ; GCN-NEXT: v_writelane_b32 v39, s39, 0 ; GCN-NEXT: v_writelane_b32 v39, s48, 1 ; GCN-NEXT: v_writelane_b32 v39, s49, 2 @@ -612,9 +608,9 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i ; GCN-NEXT: v_writelane_b32 v39, s99, 28 ; GCN-NEXT: v_writelane_b32 v39, s100, 29 ; GCN-NEXT: v_writelane_b32 v39, s101, 30 -; GCN-NEXT: v_mov_b32_e32 v1, 0x1080 -; GCN-NEXT: s_add_i32 s32, s32, 0x46000 ; GCN-NEXT: v_writelane_b32 v39, s102, 31 +; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 +; GCN-NEXT: v_mov_b32_e32 v1, 0x1080 ; GCN-NEXT: s_mov_b32 s32, s34 ; GCN-NEXT: v_readlane_b32 s34, v39, 33 ; GCN-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll index d2394bab82c77..70bcb99e05777 100644 --- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll +++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll @@ -1270,24 +1270,24 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-OPT-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; WAVE32-OPT-NEXT: s_mov_b32 exec_lo, s16 ; WAVE32-OPT-NEXT: v_writelane_b32 v32, s30, 0 +; WAVE32-OPT-NEXT: s_addk_i32 s32, 0x1200 +; WAVE32-OPT-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE32-OPT-NEXT: v_mov_b32_e32 v0, 42 ; WAVE32-OPT-NEXT: v_mov_b32_e32 v1, 17 -; WAVE32-OPT-NEXT: s_addk_i32 s32, 0x1200 -; WAVE32-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi ; WAVE32-OPT-NEXT: s_mov_b32 s18, s32 +; WAVE32-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi ; WAVE32-OPT-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo -; WAVE32-OPT-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE32-OPT-NEXT: s_lshr_b32 s19, s18, 5 ; WAVE32-OPT-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; WAVE32-OPT-NEXT: s_waitcnt_vscnt null, 0x0 ; WAVE32-OPT-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; WAVE32-OPT-NEXT: s_swappc_b64 s[30:31], s[16:17] ; WAVE32-OPT-NEXT: s_mov_b32 s32, s18 +; WAVE32-OPT-NEXT: v_readlane_b32 s30, v32, 0 ; WAVE32-OPT-NEXT: ;;#ASMSTART ; WAVE32-OPT-NEXT: ; use s19 ; WAVE32-OPT-NEXT: ;;#ASMEND ; WAVE32-OPT-NEXT: v_readlane_b32 s31, v32, 1 -; WAVE32-OPT-NEXT: v_readlane_b32 s30, v32, 0 ; WAVE32-OPT-NEXT: s_mov_b32 s32, s33 ; WAVE32-OPT-NEXT: s_xor_saveexec_b32 s4, -1 ; WAVE32-OPT-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -1305,24 +1305,24 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE64-OPT-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill ; WAVE64-OPT-NEXT: s_mov_b64 exec, s[16:17] ; WAVE64-OPT-NEXT: v_writelane_b32 v32, s30, 0 +; WAVE64-OPT-NEXT: s_addk_i32 s32, 0x2400 +; WAVE64-OPT-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE64-OPT-NEXT: v_mov_b32_e32 v0, 42 ; WAVE64-OPT-NEXT: v_mov_b32_e32 v1, 17 -; WAVE64-OPT-NEXT: s_addk_i32 s32, 0x2400 -; WAVE64-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi ; WAVE64-OPT-NEXT: s_mov_b32 s18, s32 +; WAVE64-OPT-NEXT: s_mov_b32 s17, stack_passed_argument@abs32@hi ; WAVE64-OPT-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo -; WAVE64-OPT-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE64-OPT-NEXT: s_lshr_b32 s19, s18, 6 ; WAVE64-OPT-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; WAVE64-OPT-NEXT: s_waitcnt_vscnt null, 0x0 ; WAVE64-OPT-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; WAVE64-OPT-NEXT: s_swappc_b64 s[30:31], s[16:17] ; WAVE64-OPT-NEXT: s_mov_b32 s32, s18 +; WAVE64-OPT-NEXT: v_readlane_b32 s30, v32, 0 ; WAVE64-OPT-NEXT: ;;#ASMSTART ; WAVE64-OPT-NEXT: ; use s19 ; WAVE64-OPT-NEXT: ;;#ASMEND ; WAVE64-OPT-NEXT: v_readlane_b32 s31, v32, 1 -; WAVE64-OPT-NEXT: v_readlane_b32 s30, v32, 0 ; WAVE64-OPT-NEXT: s_mov_b32 s32, s33 ; WAVE64-OPT-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; WAVE64-OPT-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -1431,8 +1431,8 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-O0-NEXT: ; use s5 ; WAVE32-O0-NEXT: ;;#ASMEND ; WAVE32-O0-NEXT: s_mov_b32 s32, s4 -; WAVE32-O0-NEXT: v_readlane_b32 s31, v32, 1 ; WAVE32-O0-NEXT: v_readlane_b32 s30, v32, 0 +; WAVE32-O0-NEXT: v_readlane_b32 s31, v32, 1 ; WAVE32-O0-NEXT: s_mov_b32 s32, s33 ; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1 ; WAVE32-O0-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -1542,8 +1542,8 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE64-O0-NEXT: ; use s5 ; WAVE64-O0-NEXT: ;;#ASMEND ; WAVE64-O0-NEXT: s_mov_b32 s32, s4 -; WAVE64-O0-NEXT: v_readlane_b32 s31, v32, 1 ; WAVE64-O0-NEXT: v_readlane_b32 s30, v32, 0 +; WAVE64-O0-NEXT: v_readlane_b32 s31, v32, 1 ; WAVE64-O0-NEXT: s_mov_b32 s32, s33 ; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; WAVE64-O0-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload @@ -1653,8 +1653,8 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-WWM-PREALLOC-NEXT: ; use s5 ; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4 -; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s31, v33, 1 ; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s30, v33, 0 +; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s31, v33, 1 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s33 ; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1 ; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll index ebd4bc881f2af..249d2dd85243b 100644 --- a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll +++ b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll @@ -184,18 +184,18 @@ define void @outgoing_f16_arg(ptr %ptr) #0 { ; GFX7-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] -; GFX7-NEXT: flat_load_ushort v0, v[0:1] ; GFX7-NEXT: v_writelane_b32 v40, s16, 2 ; GFX7-NEXT: v_writelane_b32 v40, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, f16_user@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, f16_user@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: v_writelane_b32 v40, s31, 1 +; GFX7-NEXT: flat_load_ushort v0, v[0:1] +; GFX7-NEXT: s_mov_b32 s17, f16_user@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, f16_user@abs32@lo ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: v_readlane_b32 s30, v40, 0 +; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v40, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -218,20 +218,20 @@ define void @outgoing_v2f16_arg(ptr %ptr) #0 { ; GFX7-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] -; GFX7-NEXT: flat_load_dword v1, v[0:1] ; GFX7-NEXT: v_writelane_b32 v40, s16, 2 ; GFX7-NEXT: v_writelane_b32 v40, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, v2f16_user@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, v2f16_user@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: v_writelane_b32 v40, s31, 1 +; GFX7-NEXT: flat_load_dword v1, v[0:1] +; GFX7-NEXT: s_mov_b32 s17, v2f16_user@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, v2f16_user@abs32@lo ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v1 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: v_readlane_b32 s30, v40, 0 +; GFX7-NEXT: v_readlane_b32 s31, v40, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v40, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -255,19 +255,19 @@ define void @outgoing_f16_return(ptr %ptr) #0 { ; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v42, s16, 2 -; GFX7-NEXT: v_writelane_b32 v42, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, f16_result@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, f16_result@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX7-NEXT: v_writelane_b32 v42, s30, 0 ; GFX7-NEXT: v_writelane_b32 v42, s31, 1 +; GFX7-NEXT: s_mov_b32 s17, f16_result@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, f16_result@abs32@lo ; GFX7-NEXT: v_mov_b32_e32 v41, v1 ; GFX7-NEXT: v_mov_b32_e32 v40, v0 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: v_readlane_b32 s30, v42, 0 +; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 @@ -297,20 +297,20 @@ define void @outgoing_v2f16_return(ptr %ptr) #0 { ; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v42, s16, 2 -; GFX7-NEXT: v_writelane_b32 v42, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, v2f16_result@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, v2f16_result@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX7-NEXT: v_writelane_b32 v42, s30, 0 ; GFX7-NEXT: v_writelane_b32 v42, s31, 1 +; GFX7-NEXT: s_mov_b32 s17, v2f16_result@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, v2f16_result@abs32@lo ; GFX7-NEXT: v_mov_b32_e32 v41, v1 ; GFX7-NEXT: v_mov_b32_e32 v40, v0 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: v_readlane_b32 s30, v42, 0 +; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -345,13 +345,13 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 { ; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v42, s16, 2 -; GFX7-NEXT: v_writelane_b32 v42, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, v4f16_result@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, v4f16_result@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX7-NEXT: v_writelane_b32 v42, s30, 0 ; GFX7-NEXT: v_writelane_b32 v42, s31, 1 +; GFX7-NEXT: s_mov_b32 s17, v4f16_result@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, v4f16_result@abs32@lo ; GFX7-NEXT: v_mov_b32_e32 v41, v1 ; GFX7-NEXT: v_mov_b32_e32 v40, v0 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -381,8 +381,8 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 { ; GFX7-NEXT: flat_store_dword v[40:41], v4 ; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: v_readlane_b32 s30, v42, 0 +; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v42, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -406,13 +406,13 @@ define void @outgoing_v8f16_return(ptr %ptr) #0 { ; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v42, s16, 2 -; GFX7-NEXT: v_writelane_b32 v42, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, v8f16_result@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, v8f16_result@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX7-NEXT: v_writelane_b32 v42, s30, 0 ; GFX7-NEXT: v_writelane_b32 v42, s31, 1 +; GFX7-NEXT: s_mov_b32 s17, v8f16_result@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, v8f16_result@abs32@lo ; GFX7-NEXT: v_mov_b32_e32 v41, v1 ; GFX7-NEXT: v_mov_b32_e32 v40, v0 ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -468,8 +468,8 @@ define void @outgoing_v8f16_return(ptr %ptr) #0 { ; GFX7-NEXT: flat_store_dword v[40:41], v8 ; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: v_readlane_b32 s30, v42, 0 +; GFX7-NEXT: v_readlane_b32 s31, v42, 1 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v42, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -494,10 +494,10 @@ define half @call_split_type_used_outside_block_v8f16() #0 { ; GFX7-NEXT: s_mov_b64 exec, s[18:19] ; GFX7-NEXT: v_writelane_b32 v40, s16, 2 ; GFX7-NEXT: v_writelane_b32 v40, s30, 0 -; GFX7-NEXT: s_mov_b32 s17, v8f16_result@abs32@hi -; GFX7-NEXT: s_mov_b32 s16, v8f16_result@abs32@lo ; GFX7-NEXT: s_addk_i32 s32, 0x400 ; GFX7-NEXT: v_writelane_b32 v40, s31, 1 +; GFX7-NEXT: s_mov_b32 s17, v8f16_result@abs32@hi +; GFX7-NEXT: s_mov_b32 s16, v8f16_result@abs32@lo ; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 @@ -518,6 +518,7 @@ define half @call_split_type_used_outside_block_v8f16() #0 { ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_readlane_b32 s30, v40, 0 ; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 ; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 @@ -527,7 +528,6 @@ define half @call_split_type_used_outside_block_v8f16() #0 { ; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: v_readlane_b32 s31, v40, 1 -; GFX7-NEXT: v_readlane_b32 s30, v40, 0 ; GFX7-NEXT: s_mov_b32 s32, s33 ; GFX7-NEXT: v_readlane_b32 s4, v40, 2 ; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll b/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll index c4af66e922e8d..42dc23a55a6dc 100644 --- a/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev504645-global-fold.ll @@ -10,19 +10,20 @@ define void @test_load_zext() { ; CHECK-NEXT: s_or_saveexec_b64 s[2:3], -1 ; CHECK-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[2:3] -; CHECK-NEXT: s_add_i32 s32, s32, 16 ; CHECK-NEXT: v_writelane_b32 v40, s0, 2 +; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: s_add_i32 s32, s32, 16 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_getpc_b64 s[0:1] ; CHECK-NEXT: s_add_u32 s0, s0, has_spgr_args@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s1, s1, has_spgr_args@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; CHECK-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-NEXT: s_mov_b32 s0, DescriptorBuffer@abs32@lo -; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[2:3] -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s0, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[2:3], -1 diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll index 242b5e9aeaf42..dab830f8fb286 100644 --- a/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll @@ -18,16 +18,16 @@ define void @tail_call_i32_inreg_divergent(i32 %vgpr) { ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] ; CHECK-NEXT: v_writelane_b32 v40, s16, 2 -; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, void_func_i32_inreg@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, void_func_i32_inreg@rel32@hi+12 ; CHECK-NEXT: ; illegal copy v0 to s0 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -51,19 +51,19 @@ define void @indirect_tail_call_i32_inreg_divergent(i32 %vgpr) { ; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[18:19] -; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v40, s16, 2 +; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: s_addk_i32 s32, 0x400 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, constant@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, constant@rel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; CHECK-NEXT: v_writelane_b32 v40, s30, 0 -; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: ; illegal copy v0 to s0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_mov_b32 s32, s33 ; CHECK-NEXT: v_readlane_b32 s4, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir index cc261b0da4a8f..f4dc2aeb3e848 100644 --- a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir +++ b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir @@ -19,8 +19,12 @@ body: | ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_reg ; GCN: liveins: $sgpr20, $vgpr1 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0 @@ -52,9 +56,15 @@ body: | ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg ; GCN: liveins: $sgpr20, $sgpr21, $vgpr1 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 256 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF @@ -91,8 +101,13 @@ body: | ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_different_outgoing_reg ; GCN: liveins: $sgpr20, $vgpr1 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2 @@ -123,8 +138,14 @@ body: | ; GCN-LABEL: name: wwm_csr_spill_reload ; GCN: liveins: $sgpr20, $vgpr1 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 0 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir b/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir index 4122a530ee861..5b330e892aa34 100644 --- a/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir +++ b/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir @@ -18,6 +18,9 @@ body: | ; GCN-LABEL: name: vgpr_use_after_prolog_spill ; GCN: liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0 @@ -42,6 +45,9 @@ body: | ; GCN-LABEL: name: livein_vgpr_def_after_prolog_spill ; GCN: liveins: $sgpr42, $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr1, implicit $exec ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0 @@ -65,6 +71,9 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll index 0cf26be3ac24f..42386385a8016 100644 --- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll +++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll @@ -7,6 +7,8 @@ define amdgpu_ps float @simple_test_return_to_epilog(float %a) #0 { ; GCN: bb.0.entry: ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 entry: ret float %a @@ -18,6 +20,8 @@ define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float ; GCN-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; GCN-NEXT: liveins: $sgpr2, $vgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN-NEXT: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; GCN-NEXT: {{ $}} @@ -51,6 +55,8 @@ define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, ; GCN-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000) ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN-NEXT: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; GCN-NEXT: {{ $}} @@ -103,6 +109,8 @@ define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(floa ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 $vgpr0, implicit $mode, implicit $exec ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN-NEXT: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr1, implicit-def $vcc, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll index d80ec6bd34945..4fae53f06f4f2 100644 --- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll +++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -13,14 +13,14 @@ define internal fastcc void @widget() { ; GFX90A-NEXT: s_or_saveexec_b64 s[18:19], -1 ; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[18:19] -; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 ; GFX90A-NEXT: s_getpc_b64 s[16:17] ; GFX90A-NEXT: s_add_u32 s16, s16, wobble@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s17, s17, wobble@gotpcrel32@hi+12 ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 -; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] bb: diff --git a/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll index 321b64510c35f..c871293de7436 100644 --- a/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll +++ b/llvm/test/CodeGen/AMDGPU/unfold-masked-merge-scalar-variablemask.ll @@ -648,27 +648,26 @@ define i32 @s_in_multiuse_A(i32 inreg %x, i32 inreg %y, i32 inreg %z, i32 inreg ; GCN-NEXT: s_mov_b32 exec_lo, s16 ; GCN-NEXT: v_writelane_b32 v40, s2, 4 ; GCN-NEXT: s_add_i32 s32, s32, 16 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s30, 2 +; GCN-NEXT: v_writelane_b32 v40, s31, 3 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, use32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, use32@gotpcrel32@hi+12 ; GCN-NEXT: s_xor_b32 s0, s0, s1 ; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: s_mov_b32 s34, s1 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: s_and_b32 s35, s0, s3 -; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GCN-NEXT: s_mov_b32 s34, s1 ; GCN-NEXT: v_mov_b32_e32 v0, s35 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: s_xor_b32 s0, s35, s34 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s30, v40, 2 ; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 3 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s0, v40, 4 ; GCN-NEXT: s_or_saveexec_b32 s1, -1 @@ -693,29 +692,28 @@ define i32 @s_in_multiuse_B(i32 inreg %x, i32 inreg %y, i32 inreg %z, i32 inreg ; GCN-NEXT: s_or_saveexec_b32 s16, -1 ; GCN-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b32 exec_lo, s16 +; GCN-NEXT: v_writelane_b32 v40, s2, 4 ; GCN-NEXT: s_add_i32 s32, s32, 16 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s30, 2 +; GCN-NEXT: v_writelane_b32 v40, s31, 3 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, use32@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, use32@gotpcrel32@hi+12 -; GCN-NEXT: v_writelane_b32 v40, s2, 4 -; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 ; GCN-NEXT: s_xor_b32 s0, s0, s1 -; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GCN-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 ; GCN-NEXT: s_mov_b32 s34, s1 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: s_and_b32 s35, s0, s3 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: s_xor_b32 s0, s35, s34 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s30, v40, 2 ; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s31, v40, 3 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s0, v40, 4 ; GCN-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/unspill-vgpr-after-rewrite-vgpr-mfma.ll b/llvm/test/CodeGen/AMDGPU/unspill-vgpr-after-rewrite-vgpr-mfma.ll index a81d9a458e23a..a82453ee23ee9 100644 --- a/llvm/test/CodeGen/AMDGPU/unspill-vgpr-after-rewrite-vgpr-mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/unspill-vgpr-after-rewrite-vgpr-mfma.ll @@ -8,10 +8,6 @@ define void @eliminate_spill_after_mfma_rewrite(i32 %x, i32 %y, <4 x i32> %arg, ; CHECK-LABEL: eliminate_spill_after_mfma_rewrite: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_accvgpr_write_b32 a3, v5 -; CHECK-NEXT: v_accvgpr_write_b32 a2, v4 -; CHECK-NEXT: v_accvgpr_write_b32 a1, v3 -; CHECK-NEXT: v_accvgpr_write_b32 a0, v2 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill @@ -60,6 +56,11 @@ define void @eliminate_spill_after_mfma_rewrite(i32 %x, i32 %y, <4 x i32> %arg, ; CHECK-NEXT: buffer_store_dword a61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword a62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword a63, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: v_accvgpr_write_b32 a3, v5 +; CHECK-NEXT: v_accvgpr_write_b32 a2, v4 +; CHECK-NEXT: v_accvgpr_write_b32 a1, v3 +; CHECK-NEXT: v_accvgpr_write_b32 a0, v2 +; CHECK-NEXT: s_nop 1 ; CHECK-NEXT: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[0:3] ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def v[32:63], v[0:31] @@ -212,10 +213,6 @@ define void @eliminate_spill_after_mfma_rewrite_x2(i32 %x, i32 %y, <4 x i32> %ar ; CHECK-LABEL: eliminate_spill_after_mfma_rewrite_x2: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_accvgpr_write_b32 a3, v5 -; CHECK-NEXT: v_accvgpr_write_b32 a2, v4 -; CHECK-NEXT: v_accvgpr_write_b32 a1, v3 -; CHECK-NEXT: v_accvgpr_write_b32 a0, v2 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill @@ -264,6 +261,11 @@ define void @eliminate_spill_after_mfma_rewrite_x2(i32 %x, i32 %y, <4 x i32> %ar ; CHECK-NEXT: buffer_store_dword a61, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword a62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword a63, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: v_accvgpr_write_b32 a3, v5 +; CHECK-NEXT: v_accvgpr_write_b32 a2, v4 +; CHECK-NEXT: v_accvgpr_write_b32 a1, v3 +; CHECK-NEXT: v_accvgpr_write_b32 a0, v2 +; CHECK-NEXT: s_nop 1 ; CHECK-NEXT: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[0:3] ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def v[32:63], v[0:31] diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll index 25e8581fb6cdd..639dcdcbf1c2a 100644 --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -14,22 +14,22 @@ define hidden void @widget() { ; GCN-NEXT: v_writelane_b32 v41, s16, 16 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v41, s30, 0 -; GCN-NEXT: v_writelane_b32 v41, s31, 1 -; GCN-NEXT: v_writelane_b32 v41, s34, 2 -; GCN-NEXT: v_writelane_b32 v41, s35, 3 -; GCN-NEXT: v_writelane_b32 v41, s36, 4 -; GCN-NEXT: v_writelane_b32 v41, s37, 5 -; GCN-NEXT: v_writelane_b32 v41, s38, 6 -; GCN-NEXT: v_writelane_b32 v41, s39, 7 -; GCN-NEXT: v_writelane_b32 v41, s48, 8 -; GCN-NEXT: v_writelane_b32 v41, s49, 9 -; GCN-NEXT: v_writelane_b32 v41, s50, 10 -; GCN-NEXT: v_writelane_b32 v41, s51, 11 -; GCN-NEXT: v_writelane_b32 v41, s52, 12 -; GCN-NEXT: v_writelane_b32 v41, s53, 13 -; GCN-NEXT: v_writelane_b32 v41, s54, 14 -; GCN-NEXT: v_writelane_b32 v41, s55, 15 +; GCN-NEXT: v_writelane_b32 v41, s34, 0 +; GCN-NEXT: v_writelane_b32 v41, s35, 1 +; GCN-NEXT: v_writelane_b32 v41, s36, 2 +; GCN-NEXT: v_writelane_b32 v41, s37, 3 +; GCN-NEXT: v_writelane_b32 v41, s38, 4 +; GCN-NEXT: v_writelane_b32 v41, s39, 5 +; GCN-NEXT: v_writelane_b32 v41, s48, 6 +; GCN-NEXT: v_writelane_b32 v41, s49, 7 +; GCN-NEXT: v_writelane_b32 v41, s50, 8 +; GCN-NEXT: v_writelane_b32 v41, s51, 9 +; GCN-NEXT: v_writelane_b32 v41, s52, 10 +; GCN-NEXT: v_writelane_b32 v41, s53, 11 +; GCN-NEXT: v_writelane_b32 v41, s54, 12 +; GCN-NEXT: v_writelane_b32 v41, s55, 13 +; GCN-NEXT: v_writelane_b32 v41, s30, 14 +; GCN-NEXT: v_writelane_b32 v41, s31, 15 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_load_dword v0, v[0:1] @@ -93,22 +93,22 @@ define hidden void @widget() { ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: .LBB0_8: ; %UnifiedReturnBlock -; GCN-NEXT: v_readlane_b32 s55, v41, 15 -; GCN-NEXT: v_readlane_b32 s54, v41, 14 -; GCN-NEXT: v_readlane_b32 s53, v41, 13 -; GCN-NEXT: v_readlane_b32 s52, v41, 12 -; GCN-NEXT: v_readlane_b32 s51, v41, 11 -; GCN-NEXT: v_readlane_b32 s50, v41, 10 -; GCN-NEXT: v_readlane_b32 s49, v41, 9 -; GCN-NEXT: v_readlane_b32 s48, v41, 8 -; GCN-NEXT: v_readlane_b32 s39, v41, 7 -; GCN-NEXT: v_readlane_b32 s38, v41, 6 -; GCN-NEXT: v_readlane_b32 s37, v41, 5 -; GCN-NEXT: v_readlane_b32 s36, v41, 4 -; GCN-NEXT: v_readlane_b32 s35, v41, 3 -; GCN-NEXT: v_readlane_b32 s34, v41, 2 -; GCN-NEXT: v_readlane_b32 s31, v41, 1 -; GCN-NEXT: v_readlane_b32 s30, v41, 0 +; GCN-NEXT: v_readlane_b32 s30, v41, 14 +; GCN-NEXT: v_readlane_b32 s31, v41, 15 +; GCN-NEXT: v_readlane_b32 s55, v41, 13 +; GCN-NEXT: v_readlane_b32 s54, v41, 12 +; GCN-NEXT: v_readlane_b32 s53, v41, 11 +; GCN-NEXT: v_readlane_b32 s52, v41, 10 +; GCN-NEXT: v_readlane_b32 s51, v41, 9 +; GCN-NEXT: v_readlane_b32 s50, v41, 8 +; GCN-NEXT: v_readlane_b32 s49, v41, 7 +; GCN-NEXT: v_readlane_b32 s48, v41, 6 +; GCN-NEXT: v_readlane_b32 s39, v41, 5 +; GCN-NEXT: v_readlane_b32 s38, v41, 4 +; GCN-NEXT: v_readlane_b32 s37, v41, 3 +; GCN-NEXT: v_readlane_b32 s36, v41, 2 +; GCN-NEXT: v_readlane_b32 s35, v41, 1 +; GCN-NEXT: v_readlane_b32 s34, v41, 0 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v41, 16 @@ -266,32 +266,32 @@ define hidden void @blam() { ; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v45, s30, 0 -; GCN-NEXT: v_writelane_b32 v45, s31, 1 -; GCN-NEXT: v_writelane_b32 v45, s34, 2 -; GCN-NEXT: v_writelane_b32 v45, s35, 3 -; GCN-NEXT: v_writelane_b32 v45, s36, 4 -; GCN-NEXT: v_writelane_b32 v45, s37, 5 -; GCN-NEXT: v_writelane_b32 v45, s38, 6 -; GCN-NEXT: v_writelane_b32 v45, s39, 7 -; GCN-NEXT: v_writelane_b32 v45, s48, 8 -; GCN-NEXT: v_writelane_b32 v45, s49, 9 -; GCN-NEXT: v_writelane_b32 v45, s50, 10 -; GCN-NEXT: v_writelane_b32 v45, s51, 11 -; GCN-NEXT: v_writelane_b32 v45, s52, 12 -; GCN-NEXT: v_writelane_b32 v45, s53, 13 -; GCN-NEXT: v_writelane_b32 v45, s54, 14 -; GCN-NEXT: v_writelane_b32 v45, s55, 15 -; GCN-NEXT: v_writelane_b32 v45, s64, 16 -; GCN-NEXT: v_writelane_b32 v45, s65, 17 -; GCN-NEXT: v_writelane_b32 v45, s66, 18 -; GCN-NEXT: v_writelane_b32 v45, s67, 19 -; GCN-NEXT: v_writelane_b32 v45, s68, 20 -; GCN-NEXT: v_writelane_b32 v45, s69, 21 -; GCN-NEXT: v_writelane_b32 v45, s70, 22 -; GCN-NEXT: v_writelane_b32 v45, s71, 23 -; GCN-NEXT: v_writelane_b32 v45, s80, 24 -; GCN-NEXT: v_writelane_b32 v45, s81, 25 +; GCN-NEXT: v_writelane_b32 v45, s34, 0 +; GCN-NEXT: v_writelane_b32 v45, s35, 1 +; GCN-NEXT: v_writelane_b32 v45, s36, 2 +; GCN-NEXT: v_writelane_b32 v45, s37, 3 +; GCN-NEXT: v_writelane_b32 v45, s38, 4 +; GCN-NEXT: v_writelane_b32 v45, s39, 5 +; GCN-NEXT: v_writelane_b32 v45, s48, 6 +; GCN-NEXT: v_writelane_b32 v45, s49, 7 +; GCN-NEXT: v_writelane_b32 v45, s50, 8 +; GCN-NEXT: v_writelane_b32 v45, s51, 9 +; GCN-NEXT: v_writelane_b32 v45, s52, 10 +; GCN-NEXT: v_writelane_b32 v45, s53, 11 +; GCN-NEXT: v_writelane_b32 v45, s54, 12 +; GCN-NEXT: v_writelane_b32 v45, s55, 13 +; GCN-NEXT: v_writelane_b32 v45, s64, 14 +; GCN-NEXT: v_writelane_b32 v45, s65, 15 +; GCN-NEXT: v_writelane_b32 v45, s66, 16 +; GCN-NEXT: v_writelane_b32 v45, s67, 17 +; GCN-NEXT: v_writelane_b32 v45, s68, 18 +; GCN-NEXT: v_writelane_b32 v45, s69, 19 +; GCN-NEXT: v_writelane_b32 v45, s70, 20 +; GCN-NEXT: v_writelane_b32 v45, s71, 21 +; GCN-NEXT: v_writelane_b32 v45, s80, 22 +; GCN-NEXT: v_writelane_b32 v45, s81, 23 +; GCN-NEXT: v_writelane_b32 v45, s30, 24 +; GCN-NEXT: v_writelane_b32 v45, s31, 25 ; GCN-NEXT: v_mov_b32_e32 v40, v31 ; GCN-NEXT: s_mov_b32 s54, s15 ; GCN-NEXT: s_mov_b32 s55, s14 @@ -427,32 +427,32 @@ define hidden void @blam() { ; GCN-NEXT: s_branch .LBB1_1 ; GCN-NEXT: .LBB1_18: ; %DummyReturnBlock ; GCN-NEXT: s_or_b64 exec, exec, s[66:67] -; GCN-NEXT: v_readlane_b32 s81, v45, 25 -; GCN-NEXT: v_readlane_b32 s80, v45, 24 -; GCN-NEXT: v_readlane_b32 s71, v45, 23 -; GCN-NEXT: v_readlane_b32 s70, v45, 22 -; GCN-NEXT: v_readlane_b32 s69, v45, 21 -; GCN-NEXT: v_readlane_b32 s68, v45, 20 -; GCN-NEXT: v_readlane_b32 s67, v45, 19 -; GCN-NEXT: v_readlane_b32 s66, v45, 18 -; GCN-NEXT: v_readlane_b32 s65, v45, 17 -; GCN-NEXT: v_readlane_b32 s64, v45, 16 -; GCN-NEXT: v_readlane_b32 s55, v45, 15 -; GCN-NEXT: v_readlane_b32 s54, v45, 14 -; GCN-NEXT: v_readlane_b32 s53, v45, 13 -; GCN-NEXT: v_readlane_b32 s52, v45, 12 -; GCN-NEXT: v_readlane_b32 s51, v45, 11 -; GCN-NEXT: v_readlane_b32 s50, v45, 10 -; GCN-NEXT: v_readlane_b32 s49, v45, 9 -; GCN-NEXT: v_readlane_b32 s48, v45, 8 -; GCN-NEXT: v_readlane_b32 s39, v45, 7 -; GCN-NEXT: v_readlane_b32 s38, v45, 6 -; GCN-NEXT: v_readlane_b32 s37, v45, 5 -; GCN-NEXT: v_readlane_b32 s36, v45, 4 -; GCN-NEXT: v_readlane_b32 s35, v45, 3 -; GCN-NEXT: v_readlane_b32 s34, v45, 2 -; GCN-NEXT: v_readlane_b32 s31, v45, 1 -; GCN-NEXT: v_readlane_b32 s30, v45, 0 +; GCN-NEXT: v_readlane_b32 s30, v45, 24 +; GCN-NEXT: v_readlane_b32 s31, v45, 25 +; GCN-NEXT: v_readlane_b32 s81, v45, 23 +; GCN-NEXT: v_readlane_b32 s80, v45, 22 +; GCN-NEXT: v_readlane_b32 s71, v45, 21 +; GCN-NEXT: v_readlane_b32 s70, v45, 20 +; GCN-NEXT: v_readlane_b32 s69, v45, 19 +; GCN-NEXT: v_readlane_b32 s68, v45, 18 +; GCN-NEXT: v_readlane_b32 s67, v45, 17 +; GCN-NEXT: v_readlane_b32 s66, v45, 16 +; GCN-NEXT: v_readlane_b32 s65, v45, 15 +; GCN-NEXT: v_readlane_b32 s64, v45, 14 +; GCN-NEXT: v_readlane_b32 s55, v45, 13 +; GCN-NEXT: v_readlane_b32 s54, v45, 12 +; GCN-NEXT: v_readlane_b32 s53, v45, 11 +; GCN-NEXT: v_readlane_b32 s52, v45, 10 +; GCN-NEXT: v_readlane_b32 s51, v45, 9 +; GCN-NEXT: v_readlane_b32 s50, v45, 8 +; GCN-NEXT: v_readlane_b32 s49, v45, 7 +; GCN-NEXT: v_readlane_b32 s48, v45, 6 +; GCN-NEXT: v_readlane_b32 s39, v45, 5 +; GCN-NEXT: v_readlane_b32 s38, v45, 4 +; GCN-NEXT: v_readlane_b32 s37, v45, 3 +; GCN-NEXT: v_readlane_b32 s36, v45, 2 +; GCN-NEXT: v_readlane_b32 s35, v45, 1 +; GCN-NEXT: v_readlane_b32 s34, v45, 0 ; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir index 1e815f76ee149..dd7d96f9d6e3c 100644 --- a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir +++ b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir @@ -39,11 +39,43 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $sgpr40, $sgpr41, $vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; MUBUF-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc ; MUBUF-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION def_cfa_register $sgpr33 ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 11010048, implicit-def dead $scc ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -66,6 +98,7 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; MUBUF-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_register $sgpr32 ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; MUBUF-NEXT: S_ENDPGM 0 ; @@ -74,11 +107,43 @@ body: | ; FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; FLATSCR-NEXT: liveins: $sgpr40, $sgpr41, $vgpr1 ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 ; FLATSCR-NEXT: $sgpr40 = frame-setup COPY $sgpr33 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION register $sgpr33, $sgpr40 ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc ; FLATSCR-NEXT: $sgpr41 = frame-setup COPY $sgpr34 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION register $sgpr34, $sgpr41 ; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32 + ; FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x41, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 172032, implicit-def dead $scc ; FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -104,6 +169,7 @@ body: | ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; FLATSCR-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; FLATSCR-NEXT: S_ENDPGM 0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir index 2fac3d29cb0dc..613963403cc67 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir +++ b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir @@ -19,6 +19,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_restore_to_v128_1_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $vgpr51 = COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51 @@ -44,6 +50,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_restore_to_v128_2_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -71,6 +83,12 @@ body: | ; GCN-LABEL: name: partial_spill_a128_restore_to_v128_3_of_4 ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr48 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr50 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr51 ; GCN-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 @@ -100,6 +118,20 @@ body: | ; GCN-LABEL: name: full_spill_a128_restore_to_v128 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr52 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr53 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr54 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr55 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 ; GCN-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 @@ -129,6 +161,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_restore_to_a128_1_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr24_agpr25, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29 @@ -154,6 +192,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_restore_to_a128_2_of_4 ; GCN: liveins: $agpr30, $agpr31, $agpr24_agpr25, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5) @@ -181,6 +225,12 @@ body: | ; GCN-LABEL: name: partial_spill_v128_restore_to_a128_3_of_4 ; GCN: liveins: $agpr24, $agpr25, $agpr30, $agpr31, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr26 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr27 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr28 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr29 ; GCN-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 @@ -210,6 +260,20 @@ body: | ; GCN-LABEL: name: full_spill_v128_restore_to_a128 ; GCN: liveins: $agpr4, $agpr5, $agpr6, $agpr7, $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr3 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr4 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr5 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr6 + ; GCN-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr7 ; GCN-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 ; GCN-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir index 572a875941b22..00c0f230d141a 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir @@ -26,6 +26,8 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) @@ -44,6 +46,8 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX9-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec @@ -63,6 +67,8 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) @@ -81,6 +87,8 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) @@ -125,6 +133,8 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1, addrspace 5) @@ -144,6 +154,8 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX9-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec @@ -163,6 +175,8 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5) @@ -181,6 +195,8 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1, addrspace 5) @@ -224,6 +240,9 @@ body: | ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) @@ -241,6 +260,9 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX9-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec @@ -259,6 +281,9 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec ; GFX10-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) @@ -276,6 +301,9 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) @@ -318,6 +346,10 @@ body: | ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.1, addrspace 5) @@ -336,6 +368,10 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec ; GFX9-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec @@ -354,6 +390,10 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec ; GFX10-FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5) @@ -371,6 +411,10 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.1, addrspace 5) @@ -415,6 +459,71 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -436,6 +545,71 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -458,6 +632,71 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -479,6 +718,71 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -527,6 +831,72 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec @@ -549,6 +919,72 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -571,6 +1007,72 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -592,6 +1094,72 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec @@ -641,6 +1209,73 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr3 = V_MOV_B32_e32 8200, implicit $exec @@ -664,6 +1299,73 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -686,6 +1388,73 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr3 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -707,6 +1476,73 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr3 = V_MOV_B32_e32 8200, implicit $exec @@ -757,6 +1593,70 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -778,6 +1678,70 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -800,6 +1764,70 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -821,6 +1849,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -869,6 +1961,70 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec @@ -891,6 +2047,70 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -913,6 +2133,70 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -934,6 +2218,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr2 = V_MOV_B32_e32 8200, implicit $exec @@ -983,6 +2331,70 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr3 = V_MOV_B32_e32 8200, implicit $exec @@ -1006,6 +2418,70 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $vgpr3 = V_MOV_B32_e32 $sgpr32, implicit $exec @@ -1028,6 +2504,70 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $vgpr3 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec @@ -1049,6 +2589,70 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr3 = V_MOV_B32_e32 8200, implicit $exec @@ -1098,6 +2702,71 @@ body: | ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) @@ -1115,6 +2784,71 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc @@ -1135,6 +2869,71 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc @@ -1155,6 +2954,71 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) @@ -1202,6 +3066,71 @@ body: | ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -1223,6 +3152,71 @@ body: | ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc @@ -1247,6 +3241,71 @@ body: | ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX10-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc @@ -1271,6 +3330,71 @@ body: | ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr2 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr4 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr5 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr7 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr8 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr9 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr10 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr11 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr12 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr13 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr14 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr15 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr16 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr17 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr18 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr19 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr21 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr22 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr23 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr24 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr25 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr26 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr27 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr28 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr29 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr41 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr42 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr43 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr44 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr45 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr46 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr47 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr56 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr57 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr58 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr59 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr60 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr61 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr62 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr63 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr72 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr73 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr74 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr75 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr76 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr77 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr78 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr79 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr88 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr89 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr90 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr91 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr92 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr93 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr94 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr95 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec @@ -1319,6 +3443,9 @@ body: | ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; MUBUF-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; MUBUF-NEXT: $vcc_lo = S_MOV_B32 8200 @@ -1339,6 +3466,9 @@ body: | ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x36, 0x24, 0x36, 0xe9, 0x02 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX9-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc ; GFX9-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc @@ -1360,6 +3490,9 @@ body: | ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GFX10-FLATSCR-NEXT: {{ $}} + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; GFX10-FLATSCR-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc ; GFX10-FLATSCR-NEXT: S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc @@ -1381,6 +3514,9 @@ body: | ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; VMEM-GFX8-NEXT: {{ $}} + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; VMEM-GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; VMEM-GFX8-NEXT: $vcc_lo = S_MOV_B32 8200 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir index edea344a66a3c..8862c17f8e7a5 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir @@ -17,6 +17,8 @@ body: | ; CHECK-LABEL: name: spill_v32 ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit $vgpr0 SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) @@ -39,6 +41,8 @@ body: | ; CHECK-LABEL: name: spill_v32_kill ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ... @@ -59,6 +63,8 @@ body: | ; CHECK-LABEL: name: spill_v64 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit $vgpr0_vgpr1 @@ -82,6 +88,8 @@ body: | ; CHECK-LABEL: name: spill_v64_kill ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -105,6 +113,8 @@ body: | ; CHECK-LABEL: name: spill_v64_undef_sub1_killed ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -126,6 +136,8 @@ body: | ; CHECK-LABEL: name: spill_v64_undef_sub0_killed ; CHECK: liveins: $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) @@ -147,6 +159,8 @@ body: | ; CHECK-LABEL: name: spill_v128_kill ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5) ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5) @@ -166,7 +180,9 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: spill_v32_undef - ; CHECK: S_NOP 0, implicit undef $vgpr0 + ; CHECK: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: S_NOP 0, implicit undef $vgpr0 SI_SPILL_V32_SAVE undef $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) S_NOP 0, implicit undef $vgpr0 ... @@ -183,7 +199,9 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: spill_v64_undef - ; CHECK: S_NOP 0, implicit undef $vgpr0_vgpr1 + ; CHECK: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32, 0, 6 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: S_NOP 0, implicit undef $vgpr0_vgpr1 SI_SPILL_V64_SAVE undef $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) S_NOP 0, implicit undef $vgpr0_vgpr1 ... diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll index 14f222a8c8e17..6be261c2ecb5a 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -16,15 +16,19 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: v_writelane_b32 v44, s4, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x800 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v44, s30, 0 +; GFX9-NEXT: v_writelane_b32 v44, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v36, v16 ; GFX9-NEXT: v_mov_b32_e32 v35, v15 ; GFX9-NEXT: v_mov_b32_e32 v34, v14 ; GFX9-NEXT: v_mov_b32_e32 v33, v13 ; GFX9-NEXT: v_mov_b32_e32 v32, v12 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: ;;#ASMSTART @@ -34,14 +38,10 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[4:11], s[4:7] dmask:0x1 -; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v44, s4, 2 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v44, s30, 0 -; GFX9-NEXT: v_writelane_b32 v44, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_mov_b32_e32 v0, v40 @@ -52,8 +52,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v44, 1 ; GFX9-NEXT: v_readlane_b32 s30, v44, 0 +; GFX9-NEXT: v_readlane_b32 s31, v44, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v44, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -72,15 +72,19 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: v_writelane_b32 v44, s4, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x400 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v44, s30, 0 +; GFX10-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-NEXT: v_mov_b32_e32 v36, v16 ; GFX10-NEXT: v_mov_b32_e32 v35, v15 ; GFX10-NEXT: v_mov_b32_e32 v34, v14 ; GFX10-NEXT: v_mov_b32_e32 v33, v13 ; GFX10-NEXT: v_mov_b32_e32 v32, v12 -; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: ;;#ASMSTART @@ -90,14 +94,11 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D -; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: v_writelane_b32 v44, s4, 2 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v44, s30, 0 -; GFX10-NEXT: v_writelane_b32 v44, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: v_mov_b32_e32 v0, v40 @@ -109,8 +110,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 -; GFX10-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-NEXT: v_readlane_b32 s30, v44, 0 +; GFX10-NEXT: v_readlane_b32 s31, v44, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s4, v44, 2 ; GFX10-NEXT: s_or_saveexec_b32 s5, -1 @@ -129,14 +130,21 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:16 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: v_dual_mov_b32 v36, v16 :: v_dual_mov_b32 v35, v15 -; GFX11-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v33, v13 -; GFX11-NEXT: v_mov_b32_e32 v32, v12 +; GFX11-NEXT: v_writelane_b32 v44, s0, 2 +; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s33 +; GFX11-NEXT: v_writelane_b32 v44, s30, 0 +; GFX11-NEXT: v_writelane_b32 v44, s31, 1 +; GFX11-NEXT: v_dual_mov_b32 v36, v16 :: v_dual_mov_b32 v35, v15 +; GFX11-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v33, v13 +; GFX11-NEXT: v_mov_b32_e32 v32, v12 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: ;;#ASMSTART @@ -146,14 +154,10 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D -; GFX11-NEXT: s_add_i32 s32, s32, 32 -; GFX11-NEXT: v_writelane_b32 v44, s0, 2 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v44, s30, 0 -; GFX11-NEXT: v_writelane_b32 v44, s31, 1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_dual_mov_b32 v0, v40 :: v_dual_mov_b32 v1, v41 @@ -163,8 +167,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:4 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:8 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:12 -; GFX11-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-NEXT: v_readlane_b32 s30, v44, 0 +; GFX11-NEXT: v_readlane_b32 s31, v44, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v44, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 @@ -206,25 +210,25 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: v_writelane_b32 v45, s4, 2 +; GFX9-NEXT: s_addk_i32 s32, 0x800 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v45, s30, 0 +; GFX9-NEXT: v_writelane_b32 v45, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v44, v16 ; GFX9-NEXT: v_mov_b32_e32 v43, v15 ; GFX9-NEXT: v_mov_b32_e32 v42, v14 ; GFX9-NEXT: v_mov_b32_e32 v41, v13 ; GFX9-NEXT: v_mov_b32_e32 v40, v12 ; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[40:44], s[4:11], s[4:7] dmask:0x1 -; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v45, s4, 2 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v45, s30, 0 -; GFX9-NEXT: v_writelane_b32 v45, s31, 1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -236,8 +240,8 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v45, 1 ; GFX9-NEXT: v_readlane_b32 s30, v45, 0 +; GFX9-NEXT: v_readlane_b32 s31, v45, 1 ; GFX9-NEXT: s_mov_b32 s32, s33 ; GFX9-NEXT: v_readlane_b32 s4, v45, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 @@ -256,25 +260,26 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: v_writelane_b32 v45, s4, 2 +; GFX10-NEXT: s_addk_i32 s32, 0x400 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10-NEXT: v_writelane_b32 v45, s30, 0 +; GFX10-NEXT: v_writelane_b32 v45, s31, 1 ; GFX10-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D -; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: v_writelane_b32 v45, s4, 2 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX10-NEXT: v_mov_b32_e32 v40, v16 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v41, v15 -; GFX10-NEXT: v_writelane_b32 v45, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v42, v14 ; GFX10-NEXT: v_mov_b32_e32 v43, v13 ; GFX10-NEXT: v_mov_b32_e32 v44, v12 -; GFX10-NEXT: v_writelane_b32 v45, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -286,8 +291,8 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 -; GFX10-NEXT: v_readlane_b32 s31, v45, 1 ; GFX10-NEXT: v_readlane_b32 s30, v45, 0 +; GFX10-NEXT: v_readlane_b32 s31, v45, 1 ; GFX10-NEXT: s_mov_b32 s32, s33 ; GFX10-NEXT: v_readlane_b32 s4, v45, 2 ; GFX10-NEXT: s_or_saveexec_b32 s5, -1 @@ -306,24 +311,28 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 ; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:20 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v45, s0, 2 +; GFX11-NEXT: s_add_i32 s32, s32, 32 ; GFX11-NEXT: s_clause 0x4 ; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:16 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:12 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:4 +; GFX11-NEXT: ; meta instruction ; GFX11-NEXT: scratch_store_b32 off, v44, s33 +; GFX11-NEXT: v_writelane_b32 v45, s30, 0 +; GFX11-NEXT: v_writelane_b32 v45, s31, 1 ; GFX11-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D -; GFX11-NEXT: s_add_i32 s32, s32, 32 -; GFX11-NEXT: v_writelane_b32 v45, s0, 2 ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12 ; GFX11-NEXT: v_dual_mov_b32 v40, v16 :: v_dual_mov_b32 v41, v15 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: v_writelane_b32 v45, s30, 0 ; GFX11-NEXT: v_dual_mov_b32 v42, v14 :: v_dual_mov_b32 v43, v13 ; GFX11-NEXT: v_mov_b32_e32 v44, v12 -; GFX11-NEXT: v_writelane_b32 v45, s31, 1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off ; GFX11-NEXT: s_waitcnt lgkmcnt(0) @@ -335,8 +344,8 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:8 ; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:12 ; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:16 -; GFX11-NEXT: v_readlane_b32 s31, v45, 1 ; GFX11-NEXT: v_readlane_b32 s30, v45, 0 +; GFX11-NEXT: v_readlane_b32 s31, v45, 1 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_readlane_b32 s0, v45, 2 ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 28c6b40554bb6..110013258bd89 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -3076,18 +3076,18 @@ define void @callee_no_stack_with_call() #1 { ; GFX1032-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_mov_b32 exec_lo, s17 -; GFX1032-NEXT: s_addk_i32 s32, 0x200 ; GFX1032-NEXT: v_writelane_b32 v40, s16, 2 +; GFX1032-NEXT: v_writelane_b32 v40, s30, 0 +; GFX1032-NEXT: s_addk_i32 s32, 0x200 +; GFX1032-NEXT: v_writelane_b32 v40, s31, 1 ; GFX1032-NEXT: s_getpc_b64 s[16:17] ; GFX1032-NEXT: s_add_u32 s16, s16, external_void_func_void@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s17, s17, external_void_func_void@gotpcrel32@hi+12 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX1032-NEXT: v_writelane_b32 v40, s30, 0 -; GFX1032-NEXT: v_writelane_b32 v40, s31, 1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX1032-NEXT: v_readlane_b32 s31, v40, 1 ; GFX1032-NEXT: v_readlane_b32 s30, v40, 0 +; GFX1032-NEXT: v_readlane_b32 s31, v40, 1 ; GFX1032-NEXT: s_mov_b32 s32, s33 ; GFX1032-NEXT: v_readlane_b32 s4, v40, 2 ; GFX1032-NEXT: s_or_saveexec_b32 s5, -1 @@ -3107,18 +3107,18 @@ define void @callee_no_stack_with_call() #1 { ; GFX1064-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_mov_b64 exec, s[18:19] -; GFX1064-NEXT: s_addk_i32 s32, 0x400 ; GFX1064-NEXT: v_writelane_b32 v40, s16, 2 +; GFX1064-NEXT: v_writelane_b32 v40, s30, 0 +; GFX1064-NEXT: s_addk_i32 s32, 0x400 +; GFX1064-NEXT: v_writelane_b32 v40, s31, 1 ; GFX1064-NEXT: s_getpc_b64 s[16:17] ; GFX1064-NEXT: s_add_u32 s16, s16, external_void_func_void@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s17, s17, external_void_func_void@gotpcrel32@hi+12 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 -; GFX1064-NEXT: v_writelane_b32 v40, s30, 0 -; GFX1064-NEXT: v_writelane_b32 v40, s31, 1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX1064-NEXT: v_readlane_b32 s31, v40, 1 ; GFX1064-NEXT: v_readlane_b32 s30, v40, 0 +; GFX1064-NEXT: v_readlane_b32 s31, v40, 1 ; GFX1064-NEXT: s_mov_b32 s32, s33 ; GFX1064-NEXT: v_readlane_b32 s4, v40, 2 ; GFX1064-NEXT: s_or_saveexec_b64 s[6:7], -1 diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir index adba762235d8c..9b4bd18b986e2 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir @@ -26,8 +26,13 @@ body: | ; CHECK-LABEL: name: save_inactive_lanes_non_csr_vgpr ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 14, implicit $exec ; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr0, -1, implicit-def $scc @@ -64,8 +69,12 @@ body: | ; CHECK-LABEL: name: save_all_lanes_csr_vgpr ; CHECK: liveins: $vgpr40 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 ; CHECK-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 0 ; CHECK-NEXT: $vgpr40 = V_MOV_B32_e32 14, implicit $exec ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0 @@ -101,8 +110,13 @@ body: | ; CHECK-LABEL: name: save_csr_sgpr_to_non_csr_vgpr ; CHECK: liveins: $sgpr20, $vgpr191, $vgpr192 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr192 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr192, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr192, 0 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: $vgpr192 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr192 ; CHECK-NEXT: $sgpr20 = S_MOV_B32 14, implicit $exec @@ -144,8 +158,12 @@ body: | ; CHECK-LABEL: name: save_csr_sgpr_to_csr_vgpr ; CHECK: liveins: $sgpr20, $vgpr191 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 ; CHECK-NEXT: $vcc_lo = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr191, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr191, 0 ; CHECK-NEXT: $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191 ; CHECK-NEXT: $sgpr20 = S_MOV_B32 14, implicit $exec ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr191, 0 @@ -193,11 +211,20 @@ body: | ; CHECK-LABEL: name: vgpr_and_sgpr_csr ; CHECK: liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr40, $vgpr49 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr49, 256 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 128 ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr49, implicit-def $sgpr40 @@ -250,11 +277,21 @@ body: | ; CHECK-LABEL: name: split_orig_exec ; CHECK: liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr40, $vgpr49 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr49 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr20 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr40 ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr49, 256 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 128 ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0 ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20 ; CHECK-NEXT: $sgpr3 = COPY $vcc_lo @@ -300,16 +337,32 @@ body: | ; CHECK-LABEL: name: vgpr_superregs ; CHECK: liveins: $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr40, $vgpr41, $vgpr42 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr2, 128 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr3, 256 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr4, 384 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr5, 512 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr40, 640 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr41, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr41, 768 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr42, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr42, 896 ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 14, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit-def $vgpr40_vgpr41_vgpr42 ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5) @@ -360,6 +413,9 @@ body: | ; CHECK-LABEL: name: dont_restore_used_vgprs ; CHECK: liveins: $vgpr0, $vgpr20, $vgpr40 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr0 ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit $vgpr0, implicit $vgpr20, implicit $vgpr40 ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0 @@ -398,9 +454,16 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x90, 0x40, 0x94, 0x04, 0x35, 0x24, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30, 32, $sgpr31, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr1 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $sgpr1 ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr0, 0 ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $vgpr1, 128 ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1 ; CHECK-NEXT: $sgpr1 = S_MOV_B32 $exec_lo ; CHECK-NEXT: V_CMPX_EQ_U32_nosdst_e64 $vgpr0, $vgpr1, implicit-def $exec, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll index a42c8ac706d27..75e06aed64748 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll @@ -20,6 +20,7 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) { ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_wait_alu 0xfffe @@ -44,6 +45,7 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) { ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_wait_alu 0xfffe @@ -68,6 +70,7 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) { ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_wait_alu 0xfffe @@ -93,6 +96,7 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) { ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_wait_alu 0xfffe @@ -115,6 +119,7 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) { ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x1 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -147,6 +152,7 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 % ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_wait_alu 0xfffe @@ -171,6 +177,7 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 % ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_wait_alu 0xfffe @@ -195,6 +202,7 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 % ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_wait_alu 0xfffe @@ -219,6 +227,7 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 % ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_wait_alu 0xfffe @@ -240,6 +249,7 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 % ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x1 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -364,15 +374,18 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x3 ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill +; DAGISEL-NEXT: v_writelane_b32 v2, s20, 0 ; DAGISEL-NEXT: ;;#ASMSTART ; DAGISEL-NEXT: ; clobber CSR ; DAGISEL-NEXT: ;;#ASMEND -; DAGISEL-NEXT: v_writelane_b32 v2, s20, 0 ; DAGISEL-NEXT: ;;#ASMSTART ; DAGISEL-NEXT: ; clobber non-CSR ; DAGISEL-NEXT: ;;#ASMEND @@ -403,15 +416,18 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x3 ; GISEL-NEXT: scratch_store_b32 off, v2, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v0, s32 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill +; GISEL-NEXT: v_writelane_b32 v2, s20, 0 ; GISEL-NEXT: ;;#ASMSTART ; GISEL-NEXT: ; clobber CSR ; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: v_writelane_b32 v2, s20, 0 ; GISEL-NEXT: ;;#ASMSTART ; GISEL-NEXT: ; clobber non-CSR ; GISEL-NEXT: ;;#ASMEND @@ -442,15 +458,18 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x3 ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill +; DAGISEL64-NEXT: v_writelane_b32 v2, s20, 0 ; DAGISEL64-NEXT: ;;#ASMSTART ; DAGISEL64-NEXT: ; clobber CSR ; DAGISEL64-NEXT: ;;#ASMEND -; DAGISEL64-NEXT: v_writelane_b32 v2, s20, 0 ; DAGISEL64-NEXT: ;;#ASMSTART ; DAGISEL64-NEXT: ; clobber non-CSR ; DAGISEL64-NEXT: ;;#ASMEND @@ -482,15 +501,18 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x3 ; GISEL64-NEXT: scratch_store_b32 off, v2, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill +; GISEL64-NEXT: v_writelane_b32 v2, s20, 0 ; GISEL64-NEXT: ;;#ASMSTART ; GISEL64-NEXT: ; clobber CSR ; GISEL64-NEXT: ;;#ASMEND -; GISEL64-NEXT: v_writelane_b32 v2, s20, 0 ; GISEL64-NEXT: ;;#ASMSTART ; GISEL64-NEXT: ; clobber non-CSR ; GISEL64-NEXT: ;;#ASMEND @@ -519,17 +541,20 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) { ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x3 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:16 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill +; GFX1250-DAGISEL-NEXT: v_writelane_b32 v2, s20, 0 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: ;;#ASMSTART ; GFX1250-DAGISEL-NEXT: ; clobber CSR ; GFX1250-DAGISEL-NEXT: ;;#ASMEND -; GFX1250-DAGISEL-NEXT: v_writelane_b32 v2, s20, 0 ; GFX1250-DAGISEL-NEXT: ;;#ASMSTART ; GFX1250-DAGISEL-NEXT: ; clobber non-CSR ; GFX1250-DAGISEL-NEXT: ;;#ASMEND @@ -908,6 +933,7 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) { ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x1 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -938,6 +964,7 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) { ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x1 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -968,6 +995,7 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) { ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x1 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -998,6 +1026,7 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) { ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x1 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -1025,6 +1054,7 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) { ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x1 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -1069,8 +1099,11 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ; DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; DAGISEL-NEXT: s_clause 0x3 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_wait_alu 0xfffe @@ -1099,8 +1132,11 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ; GISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GISEL-NEXT: s_clause 0x3 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_wait_alu 0xfffe @@ -1129,8 +1165,11 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ; DAGISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; DAGISEL64-NEXT: s_clause 0x3 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_wait_alu 0xfffe @@ -1161,8 +1200,11 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ; GISEL64-NEXT: s_xor_saveexec_b64 vcc, -1 ; GISEL64-NEXT: s_clause 0x3 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_wait_alu 0xfffe @@ -1190,8 +1232,11 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 vcc_lo, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x3 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -1227,10 +1272,15 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i ; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; DAGISEL-NEXT: s_clause 0x5 ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s9 @@ -1263,10 +1313,15 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i ; GISEL-NEXT: s_xor_saveexec_b32 s34, -1 ; GISEL-NEXT: s_clause 0x5 ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_mov_b32 s0, s5 @@ -1304,10 +1359,15 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; DAGISEL64-NEXT: s_clause 0x5 ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: v_mov_b32_e32 v4, s4 @@ -1343,10 +1403,15 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i ; GISEL64-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GISEL64-NEXT: s_clause 0x5 ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_mov_b32 s0, s5 @@ -1383,10 +1448,15 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x5 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -1431,170 +1501,308 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:164 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:168 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:172 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:176 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:180 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:184 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:188 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:192 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:196 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:200 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:204 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:208 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:212 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:216 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:220 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:224 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:228 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:232 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:236 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:240 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:244 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:248 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:252 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:260 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:264 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:268 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:272 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:276 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:280 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:284 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:288 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:292 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:296 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:300 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:304 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:308 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:312 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:316 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:320 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:324 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:328 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:332 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:336 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:340 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:344 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:348 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:352 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:356 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:360 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:364 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:368 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:372 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:376 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:380 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:388 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:392 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:396 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:400 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:404 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:408 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:412 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:416 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:420 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:424 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:428 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:432 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:436 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:440 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:444 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:448 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:452 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:456 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:460 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:464 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:468 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:472 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:476 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:480 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:484 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:488 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:492 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:496 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:500 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:504 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:508 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; DAGISEL-NEXT: s_clause 0xf ; DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:516 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:520 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:524 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:528 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:532 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:536 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:540 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:544 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:548 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:552 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:556 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:560 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:564 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:568 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:572 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: v_writelane_b32 v40, s0, 3 +; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250 +; DAGISEL-NEXT: v_writelane_b32 v40, s4, 0 +; DAGISEL-NEXT: v_writelane_b32 v40, s30, 1 +; DAGISEL-NEXT: v_writelane_b32 v40, s31, 2 ; DAGISEL-NEXT: v_mov_b32_e32 v2, v0 ; DAGISEL-NEXT: v_swap_b32 v0, v1 ; DAGISEL-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi -; DAGISEL-NEXT: v_writelane_b32 v40, s4, 0 ; DAGISEL-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo -; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250 -; DAGISEL-NEXT: v_writelane_b32 v40, s30, 1 -; DAGISEL-NEXT: v_writelane_b32 v40, s31, 2 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; DAGISEL-NEXT: v_readlane_b32 s31, v40, 2 ; DAGISEL-NEXT: v_readlane_b32 s30, v40, 1 +; DAGISEL-NEXT: v_readlane_b32 s31, v40, 2 ; DAGISEL-NEXT: v_readlane_b32 s4, v40, 0 ; DAGISEL-NEXT: v_readlane_b32 s0, v40, 3 ; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload @@ -1767,170 +1975,308 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v48, s33 offset:164 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v49, s33 offset:168 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v50, s33 offset:172 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v51, s33 offset:176 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v52, s33 offset:180 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v53, s33 offset:184 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v54, s33 offset:188 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v55, s33 offset:192 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v64, s33 offset:196 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v65, s33 offset:200 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v66, s33 offset:204 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v67, s33 offset:208 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v68, s33 offset:212 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v69, s33 offset:216 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v70, s33 offset:220 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v71, s33 offset:224 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v80, s33 offset:228 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v81, s33 offset:232 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v82, s33 offset:236 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v83, s33 offset:240 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v84, s33 offset:244 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v85, s33 offset:248 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v86, s33 offset:252 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v96, s33 offset:260 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v97, s33 offset:264 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v98, s33 offset:268 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v99, s33 offset:272 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v100, s33 offset:276 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v101, s33 offset:280 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v102, s33 offset:284 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v103, s33 offset:288 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v112, s33 offset:292 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v113, s33 offset:296 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v114, s33 offset:300 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v115, s33 offset:304 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v116, s33 offset:308 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v117, s33 offset:312 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v118, s33 offset:316 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v119, s33 offset:320 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v128, s33 offset:324 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v129, s33 offset:328 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v130, s33 offset:332 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v131, s33 offset:336 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v132, s33 offset:340 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v133, s33 offset:344 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v134, s33 offset:348 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v135, s33 offset:352 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v144, s33 offset:356 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v145, s33 offset:360 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v146, s33 offset:364 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v147, s33 offset:368 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v148, s33 offset:372 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v149, s33 offset:376 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v150, s33 offset:380 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v160, s33 offset:388 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v161, s33 offset:392 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v162, s33 offset:396 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v163, s33 offset:400 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v164, s33 offset:404 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v165, s33 offset:408 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v166, s33 offset:412 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v167, s33 offset:416 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v176, s33 offset:420 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v177, s33 offset:424 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v178, s33 offset:428 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v179, s33 offset:432 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v180, s33 offset:436 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v181, s33 offset:440 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v182, s33 offset:444 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v183, s33 offset:448 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v192, s33 offset:452 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v193, s33 offset:456 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v194, s33 offset:460 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v195, s33 offset:464 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v196, s33 offset:468 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v197, s33 offset:472 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v198, s33 offset:476 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v199, s33 offset:480 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v208, s33 offset:484 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v209, s33 offset:488 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v210, s33 offset:492 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v211, s33 offset:496 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v212, s33 offset:500 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v213, s33 offset:504 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v214, s33 offset:508 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; GISEL-NEXT: s_clause 0xf ; GISEL-NEXT: scratch_store_b32 off, v224, s33 offset:516 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v225, s33 offset:520 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v226, s33 offset:524 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v227, s33 offset:528 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v228, s33 offset:532 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v229, s33 offset:536 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v230, s33 offset:540 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v231, s33 offset:544 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v240, s33 offset:548 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v241, s33 offset:552 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v242, s33 offset:556 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v243, s33 offset:560 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v244, s33 offset:564 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v245, s33 offset:568 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v246, s33 offset:572 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_writelane_b32 v40, s0, 3 +; GISEL-NEXT: s_addk_co_i32 s32, 0x250 +; GISEL-NEXT: v_writelane_b32 v40, s4, 0 +; GISEL-NEXT: v_writelane_b32 v40, s30, 1 +; GISEL-NEXT: v_writelane_b32 v40, s31, 2 ; GISEL-NEXT: v_mov_b32_e32 v2, v0 ; GISEL-NEXT: v_swap_b32 v0, v1 ; GISEL-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo -; GISEL-NEXT: v_writelane_b32 v40, s4, 0 ; GISEL-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi -; GISEL-NEXT: s_addk_co_i32 s32, 0x250 -; GISEL-NEXT: v_writelane_b32 v40, s30, 1 -; GISEL-NEXT: v_writelane_b32 v40, s31, 2 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GISEL-NEXT: v_readlane_b32 s31, v40, 2 ; GISEL-NEXT: v_readlane_b32 s30, v40, 1 +; GISEL-NEXT: v_readlane_b32 s31, v40, 2 ; GISEL-NEXT: v_readlane_b32 s4, v40, 0 ; GISEL-NEXT: v_readlane_b32 s0, v40, 3 ; GISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload @@ -2103,171 +2449,309 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:164 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:168 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:172 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:176 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:180 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:184 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:188 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:192 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:196 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:200 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:204 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:208 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:212 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:216 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:220 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:224 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:228 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:232 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:236 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:240 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:244 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:248 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:252 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:260 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:264 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:268 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:272 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:276 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:280 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:284 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:288 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:292 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:296 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:300 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:304 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:308 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:312 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:316 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:320 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:324 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:328 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:332 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:336 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:340 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:344 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:348 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:352 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:356 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:360 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:364 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:368 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:372 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:376 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:380 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:388 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:392 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:396 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:400 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:404 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:408 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:412 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:416 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:420 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:424 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:428 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:432 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:436 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:440 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:444 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:448 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:452 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:456 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:460 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:464 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:468 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:472 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:476 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:480 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:484 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:488 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:492 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:496 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:500 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:504 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:508 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; DAGISEL64-NEXT: s_clause 0xf ; DAGISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:516 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:520 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:524 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:528 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:532 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:536 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:540 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:544 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:548 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:552 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:556 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:560 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:564 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:568 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:572 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: v_writelane_b32 v40, s0, 4 -; DAGISEL64-NEXT: v_mov_b32_e32 v2, v0 -; DAGISEL64-NEXT: v_swap_b32 v0, v1 -; DAGISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi -; DAGISEL64-NEXT: v_writelane_b32 v40, s4, 0 -; DAGISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo ; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250 +; DAGISEL64-NEXT: v_writelane_b32 v40, s4, 0 ; DAGISEL64-NEXT: v_writelane_b32 v40, s5, 1 ; DAGISEL64-NEXT: v_writelane_b32 v40, s30, 2 ; DAGISEL64-NEXT: v_writelane_b32 v40, s31, 3 +; DAGISEL64-NEXT: v_mov_b32_e32 v2, v0 +; DAGISEL64-NEXT: v_swap_b32 v0, v1 +; DAGISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi +; DAGISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) -; DAGISEL64-NEXT: v_readlane_b32 s31, v40, 3 ; DAGISEL64-NEXT: v_readlane_b32 s30, v40, 2 +; DAGISEL64-NEXT: v_readlane_b32 s31, v40, 3 ; DAGISEL64-NEXT: v_readlane_b32 s5, v40, 1 ; DAGISEL64-NEXT: v_readlane_b32 s4, v40, 0 ; DAGISEL64-NEXT: v_readlane_b32 s0, v40, 4 @@ -2441,171 +2925,309 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:164 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:168 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:172 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:176 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:180 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:184 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:188 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:192 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:196 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:200 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:204 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:208 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:212 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:216 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:220 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:224 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:228 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:232 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:236 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:240 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:244 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:248 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:252 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:256 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:260 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:264 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:268 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:272 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:276 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:280 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:284 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:288 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:292 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:296 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:300 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:304 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:308 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:312 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:316 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:320 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:324 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:328 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:332 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:336 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:340 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:344 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:348 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:352 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:356 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:360 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:364 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:368 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:372 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:376 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:380 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:384 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:388 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:392 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:396 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:400 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:404 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:408 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:412 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:416 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:420 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:424 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:428 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:432 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:436 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:440 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:444 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:448 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:452 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:456 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:460 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:464 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:468 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:472 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:476 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:480 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:484 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:488 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:492 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:496 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:500 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:504 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:508 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:512 ; GISEL64-NEXT: s_clause 0xf ; GISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:516 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:520 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:524 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:528 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:532 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:536 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:540 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:544 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:548 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:552 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:556 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:560 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:564 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:568 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:572 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:576 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_writelane_b32 v40, s0, 4 -; GISEL64-NEXT: v_mov_b32_e32 v2, v0 -; GISEL64-NEXT: v_swap_b32 v0, v1 -; GISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo -; GISEL64-NEXT: v_writelane_b32 v40, s4, 0 -; GISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi ; GISEL64-NEXT: s_addk_co_i32 s32, 0x250 +; GISEL64-NEXT: v_writelane_b32 v40, s4, 0 ; GISEL64-NEXT: v_writelane_b32 v40, s5, 1 ; GISEL64-NEXT: v_writelane_b32 v40, s30, 2 ; GISEL64-NEXT: v_writelane_b32 v40, s31, 3 +; GISEL64-NEXT: v_mov_b32_e32 v2, v0 +; GISEL64-NEXT: v_swap_b32 v0, v1 +; GISEL64-NEXT: s_mov_b32 s0, gfx_callee@abs32@lo +; GISEL64-NEXT: s_mov_b32 s1, gfx_callee@abs32@hi ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL64-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GISEL64-NEXT: v_readlane_b32 s31, v40, 3 ; GISEL64-NEXT: v_readlane_b32 s30, v40, 2 +; GISEL64-NEXT: v_readlane_b32 s31, v40, 3 ; GISEL64-NEXT: v_readlane_b32 s5, v40, 1 ; GISEL64-NEXT: v_readlane_b32 s4, v40, 0 ; GISEL64-NEXT: v_readlane_b32 s0, v40, 4 @@ -2776,933 +3398,1830 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:252 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:504 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 4 ; msbs: dst=0 src0=0 src1=1 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v256*/, s33 offset:580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v257*/, s33 offset:584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v258*/, s33 offset:588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v259*/, s33 offset:592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v260*/, s33 offset:596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v261*/, s33 offset:600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v262*/, s33 offset:604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v263*/, s33 offset:608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v264*/, s33 offset:612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v265*/, s33 offset:616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v266*/, s33 offset:620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v267*/, s33 offset:624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v268*/, s33 offset:628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v269*/, s33 offset:632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v270*/, s33 offset:636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v271*/, s33 offset:640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v272*/, s33 offset:644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v273*/, s33 offset:648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v274*/, s33 offset:652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v275*/, s33 offset:656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v276*/, s33 offset:660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v277*/, s33 offset:664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v278*/, s33 offset:668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v279*/, s33 offset:672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v280*/, s33 offset:676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v281*/, s33 offset:680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v282*/, s33 offset:684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v283*/, s33 offset:688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v284*/, s33 offset:692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v285*/, s33 offset:696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v286*/, s33 offset:700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v287*/, s33 offset:704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v288*/, s33 offset:708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v289*/, s33 offset:712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v290*/, s33 offset:716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v291*/, s33 offset:720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v292*/, s33 offset:724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v293*/, s33 offset:728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v294*/, s33 offset:732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v295*/, s33 offset:736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v296*/, s33 offset:740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v297*/, s33 offset:744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v298*/, s33 offset:748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v299*/, s33 offset:752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v300*/, s33 offset:756 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v301*/, s33 offset:760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v302*/, s33 offset:764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v303*/, s33 offset:768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v304*/, s33 offset:772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v305*/, s33 offset:776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v306*/, s33 offset:780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v307*/, s33 offset:784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v308*/, s33 offset:788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v309*/, s33 offset:792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v310*/, s33 offset:796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v311*/, s33 offset:800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v312*/, s33 offset:804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v313*/, s33 offset:808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v314*/, s33 offset:812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v315*/, s33 offset:816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v316*/, s33 offset:820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v317*/, s33 offset:824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v318*/, s33 offset:828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v319*/, s33 offset:832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v320*/, s33 offset:836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v321*/, s33 offset:840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v322*/, s33 offset:844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v323*/, s33 offset:848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v324*/, s33 offset:852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v325*/, s33 offset:856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v326*/, s33 offset:860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v327*/, s33 offset:864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v328*/, s33 offset:868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v329*/, s33 offset:872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v330*/, s33 offset:876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v331*/, s33 offset:880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v332*/, s33 offset:884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v333*/, s33 offset:888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v334*/, s33 offset:892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v335*/, s33 offset:896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v336*/, s33 offset:900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v337*/, s33 offset:904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v338*/, s33 offset:908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v339*/, s33 offset:912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v340*/, s33 offset:916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v341*/, s33 offset:920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v342*/, s33 offset:924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v343*/, s33 offset:928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v344*/, s33 offset:932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v345*/, s33 offset:936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v346*/, s33 offset:940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v347*/, s33 offset:944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v348*/, s33 offset:948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v349*/, s33 offset:952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v350*/, s33 offset:956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v351*/, s33 offset:960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v352*/, s33 offset:964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v353*/, s33 offset:968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v354*/, s33 offset:972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v355*/, s33 offset:976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v356*/, s33 offset:980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v357*/, s33 offset:984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v358*/, s33 offset:988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v359*/, s33 offset:992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v360*/, s33 offset:996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v361*/, s33 offset:1000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v362*/, s33 offset:1004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v363*/, s33 offset:1008 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v364*/, s33 offset:1012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v365*/, s33 offset:1016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v366*/, s33 offset:1020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v367*/, s33 offset:1024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v368*/, s33 offset:1028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v369*/, s33 offset:1032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v370*/, s33 offset:1036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v371*/, s33 offset:1040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v372*/, s33 offset:1044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v373*/, s33 offset:1048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v374*/, s33 offset:1052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v375*/, s33 offset:1056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v376*/, s33 offset:1060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v377*/, s33 offset:1064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v378*/, s33 offset:1068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v379*/, s33 offset:1072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v380*/, s33 offset:1076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v381*/, s33 offset:1080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v382*/, s33 offset:1084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v383*/, s33 offset:1088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v384*/, s33 offset:1092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v385*/, s33 offset:1096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v386*/, s33 offset:1100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v387*/, s33 offset:1104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v388*/, s33 offset:1108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v389*/, s33 offset:1112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v390*/, s33 offset:1116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v391*/, s33 offset:1120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v392*/, s33 offset:1124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v393*/, s33 offset:1128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v394*/, s33 offset:1132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v395*/, s33 offset:1136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v396*/, s33 offset:1140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v397*/, s33 offset:1144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v398*/, s33 offset:1148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v399*/, s33 offset:1152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v400*/, s33 offset:1156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v401*/, s33 offset:1160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v402*/, s33 offset:1164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v403*/, s33 offset:1168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v404*/, s33 offset:1172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v405*/, s33 offset:1176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v406*/, s33 offset:1180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v407*/, s33 offset:1184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v408*/, s33 offset:1188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v409*/, s33 offset:1192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v410*/, s33 offset:1196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v411*/, s33 offset:1200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v412*/, s33 offset:1204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v413*/, s33 offset:1208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v414*/, s33 offset:1212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v415*/, s33 offset:1216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v416*/, s33 offset:1220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v417*/, s33 offset:1224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v418*/, s33 offset:1228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v419*/, s33 offset:1232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v420*/, s33 offset:1236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v421*/, s33 offset:1240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v422*/, s33 offset:1244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v423*/, s33 offset:1248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v424*/, s33 offset:1252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v425*/, s33 offset:1256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v426*/, s33 offset:1260 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v427*/, s33 offset:1264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v428*/, s33 offset:1268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v429*/, s33 offset:1272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v430*/, s33 offset:1276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v431*/, s33 offset:1280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v432*/, s33 offset:1284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v433*/, s33 offset:1288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v434*/, s33 offset:1292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v435*/, s33 offset:1296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v436*/, s33 offset:1300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v437*/, s33 offset:1304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v438*/, s33 offset:1308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v439*/, s33 offset:1312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v440*/, s33 offset:1316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v441*/, s33 offset:1320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v442*/, s33 offset:1324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v443*/, s33 offset:1328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v444*/, s33 offset:1332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v445*/, s33 offset:1336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v446*/, s33 offset:1340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v447*/, s33 offset:1344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v448*/, s33 offset:1348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v449*/, s33 offset:1352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v450*/, s33 offset:1356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v451*/, s33 offset:1360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v452*/, s33 offset:1364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v453*/, s33 offset:1368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v454*/, s33 offset:1372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v455*/, s33 offset:1376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v456*/, s33 offset:1380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v457*/, s33 offset:1384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v458*/, s33 offset:1388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v459*/, s33 offset:1392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v460*/, s33 offset:1396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v461*/, s33 offset:1400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v462*/, s33 offset:1404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v463*/, s33 offset:1408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v464*/, s33 offset:1412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v465*/, s33 offset:1416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v466*/, s33 offset:1420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v467*/, s33 offset:1424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v468*/, s33 offset:1428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v469*/, s33 offset:1432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v470*/, s33 offset:1436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v471*/, s33 offset:1440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v472*/, s33 offset:1444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v473*/, s33 offset:1448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v474*/, s33 offset:1452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v475*/, s33 offset:1456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v476*/, s33 offset:1460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v477*/, s33 offset:1464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v478*/, s33 offset:1468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v479*/, s33 offset:1472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v480*/, s33 offset:1476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v481*/, s33 offset:1480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v482*/, s33 offset:1484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v483*/, s33 offset:1488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v484*/, s33 offset:1492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v485*/, s33 offset:1496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v486*/, s33 offset:1500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v487*/, s33 offset:1504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v488*/, s33 offset:1508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v489*/, s33 offset:1512 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v490*/, s33 offset:1516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v491*/, s33 offset:1520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v492*/, s33 offset:1524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v493*/, s33 offset:1528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v494*/, s33 offset:1532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v495*/, s33 offset:1536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v496*/, s33 offset:1540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v497*/, s33 offset:1544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v498*/, s33 offset:1548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v499*/, s33 offset:1552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v500*/, s33 offset:1556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v501*/, s33 offset:1560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v502*/, s33 offset:1564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v503*/, s33 offset:1568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v504*/, s33 offset:1572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v505*/, s33 offset:1576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v506*/, s33 offset:1580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v507*/, s33 offset:1584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v508*/, s33 offset:1588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s33 offset:1592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s33 offset:1596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s33 offset:1600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s33 offset:1604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s33 offset:1608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s33 offset:1612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v515*/, s33 offset:1616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v516*/, s33 offset:1620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v517*/, s33 offset:1624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v518*/, s33 offset:1628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v519*/, s33 offset:1632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v520*/, s33 offset:1636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v521*/, s33 offset:1640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v522*/, s33 offset:1644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v523*/, s33 offset:1648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v524*/, s33 offset:1652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v525*/, s33 offset:1656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v526*/, s33 offset:1660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v527*/, s33 offset:1664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v528*/, s33 offset:1668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v529*/, s33 offset:1672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v530*/, s33 offset:1676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v531*/, s33 offset:1680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v532*/, s33 offset:1684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v533*/, s33 offset:1688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v534*/, s33 offset:1692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v535*/, s33 offset:1696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v536*/, s33 offset:1700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v537*/, s33 offset:1704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v538*/, s33 offset:1708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v539*/, s33 offset:1712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v540*/, s33 offset:1716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v541*/, s33 offset:1720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v542*/, s33 offset:1724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v543*/, s33 offset:1728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v544*/, s33 offset:1732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v545*/, s33 offset:1736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v546*/, s33 offset:1740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v547*/, s33 offset:1744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v548*/, s33 offset:1748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v549*/, s33 offset:1752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v550*/, s33 offset:1756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v551*/, s33 offset:1760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v552*/, s33 offset:1764 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v553*/, s33 offset:1768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v554*/, s33 offset:1772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v555*/, s33 offset:1776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v556*/, s33 offset:1780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v557*/, s33 offset:1784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v558*/, s33 offset:1788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v559*/, s33 offset:1792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v560*/, s33 offset:1796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v561*/, s33 offset:1800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v562*/, s33 offset:1804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v563*/, s33 offset:1808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v564*/, s33 offset:1812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v565*/, s33 offset:1816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v566*/, s33 offset:1820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v567*/, s33 offset:1824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v568*/, s33 offset:1828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v569*/, s33 offset:1832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v570*/, s33 offset:1836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v571*/, s33 offset:1840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v572*/, s33 offset:1844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v573*/, s33 offset:1848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v574*/, s33 offset:1852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v575*/, s33 offset:1856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v576*/, s33 offset:1860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v577*/, s33 offset:1864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v578*/, s33 offset:1868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v579*/, s33 offset:1872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v580*/, s33 offset:1876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v581*/, s33 offset:1880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v582*/, s33 offset:1884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v583*/, s33 offset:1888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v584*/, s33 offset:1892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v585*/, s33 offset:1896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v586*/, s33 offset:1900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v587*/, s33 offset:1904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v588*/, s33 offset:1908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v589*/, s33 offset:1912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v590*/, s33 offset:1916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v591*/, s33 offset:1920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v592*/, s33 offset:1924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v593*/, s33 offset:1928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v594*/, s33 offset:1932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v595*/, s33 offset:1936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v596*/, s33 offset:1940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v597*/, s33 offset:1944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v598*/, s33 offset:1948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v599*/, s33 offset:1952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v600*/, s33 offset:1956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v601*/, s33 offset:1960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v602*/, s33 offset:1964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v603*/, s33 offset:1968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v604*/, s33 offset:1972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v605*/, s33 offset:1976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v606*/, s33 offset:1980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v607*/, s33 offset:1984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v608*/, s33 offset:1988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v609*/, s33 offset:1992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v610*/, s33 offset:1996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v611*/, s33 offset:2000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v612*/, s33 offset:2004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v613*/, s33 offset:2008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v614*/, s33 offset:2012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v615*/, s33 offset:2016 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v616*/, s33 offset:2020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v617*/, s33 offset:2024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v618*/, s33 offset:2028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v619*/, s33 offset:2032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v620*/, s33 offset:2036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v621*/, s33 offset:2040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v622*/, s33 offset:2044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v623*/, s33 offset:2048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v624*/, s33 offset:2052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v625*/, s33 offset:2056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v626*/, s33 offset:2060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v627*/, s33 offset:2064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v628*/, s33 offset:2068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v629*/, s33 offset:2072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v630*/, s33 offset:2076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v631*/, s33 offset:2080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v632*/, s33 offset:2084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v633*/, s33 offset:2088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v634*/, s33 offset:2092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v635*/, s33 offset:2096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v636*/, s33 offset:2100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v637*/, s33 offset:2104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v638*/, s33 offset:2108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v639*/, s33 offset:2112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v640*/, s33 offset:2116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v641*/, s33 offset:2120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v642*/, s33 offset:2124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v643*/, s33 offset:2128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v644*/, s33 offset:2132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v645*/, s33 offset:2136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v646*/, s33 offset:2140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v647*/, s33 offset:2144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v648*/, s33 offset:2148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v649*/, s33 offset:2152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v650*/, s33 offset:2156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v651*/, s33 offset:2160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v652*/, s33 offset:2164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v653*/, s33 offset:2168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v654*/, s33 offset:2172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v655*/, s33 offset:2176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v656*/, s33 offset:2180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v657*/, s33 offset:2184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v658*/, s33 offset:2188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v659*/, s33 offset:2192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v660*/, s33 offset:2196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v661*/, s33 offset:2200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v662*/, s33 offset:2204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v663*/, s33 offset:2208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v664*/, s33 offset:2212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v665*/, s33 offset:2216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v666*/, s33 offset:2220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v667*/, s33 offset:2224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v668*/, s33 offset:2228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v669*/, s33 offset:2232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v670*/, s33 offset:2236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v671*/, s33 offset:2240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v672*/, s33 offset:2244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v673*/, s33 offset:2248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v674*/, s33 offset:2252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v675*/, s33 offset:2256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v676*/, s33 offset:2260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v677*/, s33 offset:2264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v678*/, s33 offset:2268 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v679*/, s33 offset:2272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v680*/, s33 offset:2276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v681*/, s33 offset:2280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v682*/, s33 offset:2284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v683*/, s33 offset:2288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v684*/, s33 offset:2292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v685*/, s33 offset:2296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v686*/, s33 offset:2300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v687*/, s33 offset:2304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v688*/, s33 offset:2308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v689*/, s33 offset:2312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v690*/, s33 offset:2316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v691*/, s33 offset:2320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v692*/, s33 offset:2324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v693*/, s33 offset:2328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v694*/, s33 offset:2332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v695*/, s33 offset:2336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v696*/, s33 offset:2340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v697*/, s33 offset:2344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v698*/, s33 offset:2348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v699*/, s33 offset:2352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v700*/, s33 offset:2356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v701*/, s33 offset:2360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v702*/, s33 offset:2364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v703*/, s33 offset:2368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v704*/, s33 offset:2372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v705*/, s33 offset:2376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v706*/, s33 offset:2380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v707*/, s33 offset:2384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v708*/, s33 offset:2388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v709*/, s33 offset:2392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v710*/, s33 offset:2396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v711*/, s33 offset:2400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v712*/, s33 offset:2404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v713*/, s33 offset:2408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v714*/, s33 offset:2412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v715*/, s33 offset:2416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v716*/, s33 offset:2420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v717*/, s33 offset:2424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v718*/, s33 offset:2428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v719*/, s33 offset:2432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v720*/, s33 offset:2436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v721*/, s33 offset:2440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v722*/, s33 offset:2444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v723*/, s33 offset:2448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v724*/, s33 offset:2452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v725*/, s33 offset:2456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v726*/, s33 offset:2460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v727*/, s33 offset:2464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v728*/, s33 offset:2468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v729*/, s33 offset:2472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v730*/, s33 offset:2476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v731*/, s33 offset:2480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v732*/, s33 offset:2484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v733*/, s33 offset:2488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v734*/, s33 offset:2492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v735*/, s33 offset:2496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v736*/, s33 offset:2500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v737*/, s33 offset:2504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v738*/, s33 offset:2508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v739*/, s33 offset:2512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v740*/, s33 offset:2516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v741*/, s33 offset:2520 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v742*/, s33 offset:2524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v743*/, s33 offset:2528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v744*/, s33 offset:2532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v745*/, s33 offset:2536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v746*/, s33 offset:2540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v747*/, s33 offset:2544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v748*/, s33 offset:2548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v749*/, s33 offset:2552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v750*/, s33 offset:2556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v751*/, s33 offset:2560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v752*/, s33 offset:2564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v753*/, s33 offset:2568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v754*/, s33 offset:2572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v755*/, s33 offset:2576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v756*/, s33 offset:2580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v757*/, s33 offset:2584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v758*/, s33 offset:2588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v759*/, s33 offset:2592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v760*/, s33 offset:2596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v761*/, s33 offset:2600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v762*/, s33 offset:2604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v763*/, s33 offset:2608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v764*/, s33 offset:2612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s33 offset:2616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s33 offset:2620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s33 offset:2624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s33 offset:2628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s33 offset:2632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s33 offset:2636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v771*/, s33 offset:2640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v772*/, s33 offset:2644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v773*/, s33 offset:2648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v774*/, s33 offset:2652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v775*/, s33 offset:2656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v776*/, s33 offset:2660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v777*/, s33 offset:2664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v778*/, s33 offset:2668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v779*/, s33 offset:2672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v780*/, s33 offset:2676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v781*/, s33 offset:2680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v782*/, s33 offset:2684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v783*/, s33 offset:2688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v784*/, s33 offset:2692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v785*/, s33 offset:2696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v786*/, s33 offset:2700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v787*/, s33 offset:2704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v788*/, s33 offset:2708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v789*/, s33 offset:2712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v790*/, s33 offset:2716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v791*/, s33 offset:2720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v792*/, s33 offset:2724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v793*/, s33 offset:2728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v794*/, s33 offset:2732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v795*/, s33 offset:2736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v796*/, s33 offset:2740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v797*/, s33 offset:2744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v798*/, s33 offset:2748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v799*/, s33 offset:2752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v800*/, s33 offset:2756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v801*/, s33 offset:2760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v802*/, s33 offset:2764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v803*/, s33 offset:2768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v804*/, s33 offset:2772 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v805*/, s33 offset:2776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v806*/, s33 offset:2780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v807*/, s33 offset:2784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v808*/, s33 offset:2788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v809*/, s33 offset:2792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v810*/, s33 offset:2796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v811*/, s33 offset:2800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v812*/, s33 offset:2804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v813*/, s33 offset:2808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v814*/, s33 offset:2812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v815*/, s33 offset:2816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v816*/, s33 offset:2820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v817*/, s33 offset:2824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v818*/, s33 offset:2828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v819*/, s33 offset:2832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v820*/, s33 offset:2836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v821*/, s33 offset:2840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v822*/, s33 offset:2844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v823*/, s33 offset:2848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v824*/, s33 offset:2852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v825*/, s33 offset:2856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v826*/, s33 offset:2860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v827*/, s33 offset:2864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v828*/, s33 offset:2868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v829*/, s33 offset:2872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v830*/, s33 offset:2876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v831*/, s33 offset:2880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v832*/, s33 offset:2884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v833*/, s33 offset:2888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v834*/, s33 offset:2892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v835*/, s33 offset:2896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v836*/, s33 offset:2900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v837*/, s33 offset:2904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v838*/, s33 offset:2908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v839*/, s33 offset:2912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v840*/, s33 offset:2916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v841*/, s33 offset:2920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v842*/, s33 offset:2924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v843*/, s33 offset:2928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v844*/, s33 offset:2932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v845*/, s33 offset:2936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v846*/, s33 offset:2940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v847*/, s33 offset:2944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v848*/, s33 offset:2948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v849*/, s33 offset:2952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v850*/, s33 offset:2956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v851*/, s33 offset:2960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v852*/, s33 offset:2964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v853*/, s33 offset:2968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v854*/, s33 offset:2972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v855*/, s33 offset:2976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v856*/, s33 offset:2980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v857*/, s33 offset:2984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v858*/, s33 offset:2988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v859*/, s33 offset:2992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v860*/, s33 offset:2996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v861*/, s33 offset:3000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v862*/, s33 offset:3004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v863*/, s33 offset:3008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v864*/, s33 offset:3012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v865*/, s33 offset:3016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v866*/, s33 offset:3020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v867*/, s33 offset:3024 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v868*/, s33 offset:3028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v869*/, s33 offset:3032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v870*/, s33 offset:3036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v871*/, s33 offset:3040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v872*/, s33 offset:3044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v873*/, s33 offset:3048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v874*/, s33 offset:3052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v875*/, s33 offset:3056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v876*/, s33 offset:3060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v877*/, s33 offset:3064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v878*/, s33 offset:3068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v879*/, s33 offset:3072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v880*/, s33 offset:3076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v881*/, s33 offset:3080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v882*/, s33 offset:3084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v883*/, s33 offset:3088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v884*/, s33 offset:3092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v885*/, s33 offset:3096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v886*/, s33 offset:3100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v887*/, s33 offset:3104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v888*/, s33 offset:3108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v889*/, s33 offset:3112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v890*/, s33 offset:3116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v891*/, s33 offset:3120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v892*/, s33 offset:3124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v893*/, s33 offset:3128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v894*/, s33 offset:3132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v895*/, s33 offset:3136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v896*/, s33 offset:3140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v897*/, s33 offset:3144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v898*/, s33 offset:3148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v899*/, s33 offset:3152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v900*/, s33 offset:3156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v901*/, s33 offset:3160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v902*/, s33 offset:3164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v903*/, s33 offset:3168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v904*/, s33 offset:3172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v905*/, s33 offset:3176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v906*/, s33 offset:3180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v907*/, s33 offset:3184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v908*/, s33 offset:3188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v909*/, s33 offset:3192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v910*/, s33 offset:3196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v911*/, s33 offset:3200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v912*/, s33 offset:3204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v913*/, s33 offset:3208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v914*/, s33 offset:3212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v915*/, s33 offset:3216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v916*/, s33 offset:3220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v917*/, s33 offset:3224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v918*/, s33 offset:3228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v919*/, s33 offset:3232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v920*/, s33 offset:3236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v921*/, s33 offset:3240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v922*/, s33 offset:3244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v923*/, s33 offset:3248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v924*/, s33 offset:3252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v925*/, s33 offset:3256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v926*/, s33 offset:3260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v927*/, s33 offset:3264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v928*/, s33 offset:3268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v929*/, s33 offset:3272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v930*/, s33 offset:3276 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v931*/, s33 offset:3280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v932*/, s33 offset:3284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v933*/, s33 offset:3288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v934*/, s33 offset:3292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v935*/, s33 offset:3296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v936*/, s33 offset:3300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v937*/, s33 offset:3304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v938*/, s33 offset:3308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v939*/, s33 offset:3312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v940*/, s33 offset:3316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v941*/, s33 offset:3320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v942*/, s33 offset:3324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v943*/, s33 offset:3328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v944*/, s33 offset:3332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v945*/, s33 offset:3336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v946*/, s33 offset:3340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v947*/, s33 offset:3344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v948*/, s33 offset:3348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v949*/, s33 offset:3352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v950*/, s33 offset:3356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v951*/, s33 offset:3360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v952*/, s33 offset:3364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v953*/, s33 offset:3368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v954*/, s33 offset:3372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v955*/, s33 offset:3376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v956*/, s33 offset:3380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v957*/, s33 offset:3384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v958*/, s33 offset:3388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v959*/, s33 offset:3392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v960*/, s33 offset:3396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v961*/, s33 offset:3400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v962*/, s33 offset:3404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v963*/, s33 offset:3408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v964*/, s33 offset:3412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v965*/, s33 offset:3416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v966*/, s33 offset:3420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v967*/, s33 offset:3424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v968*/, s33 offset:3428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v969*/, s33 offset:3432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v970*/, s33 offset:3436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v971*/, s33 offset:3440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v972*/, s33 offset:3444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v973*/, s33 offset:3448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v974*/, s33 offset:3452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v975*/, s33 offset:3456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v976*/, s33 offset:3460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v977*/, s33 offset:3464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v978*/, s33 offset:3468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v979*/, s33 offset:3472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v980*/, s33 offset:3476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v981*/, s33 offset:3480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v982*/, s33 offset:3484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v983*/, s33 offset:3488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v984*/, s33 offset:3492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v985*/, s33 offset:3496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v986*/, s33 offset:3500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v987*/, s33 offset:3504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v988*/, s33 offset:3508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v989*/, s33 offset:3512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v990*/, s33 offset:3516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v991*/, s33 offset:3520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v992*/, s33 offset:3524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v993*/, s33 offset:3528 ; GFX1250-DAGISEL-NEXT: s_clause 0x1d ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v994*/, s33 offset:3532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v995*/, s33 offset:3536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v996*/, s33 offset:3540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v997*/, s33 offset:3544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v998*/, s33 offset:3548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v999*/, s33 offset:3552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v1000*/, s33 offset:3556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v1001*/, s33 offset:3560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v1002*/, s33 offset:3564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v1003*/, s33 offset:3568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v1004*/, s33 offset:3572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v1005*/, s33 offset:3576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v1006*/, s33 offset:3580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v1007*/, s33 offset:3584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v1008*/, s33 offset:3588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v1009*/, s33 offset:3592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v1010*/, s33 offset:3596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v1011*/, s33 offset:3600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v1012*/, s33 offset:3604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v1013*/, s33 offset:3608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v1014*/, s33 offset:3612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v1015*/, s33 offset:3616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v1016*/, s33 offset:3620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v1017*/, s33 offset:3624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v1018*/, s33 offset:3628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v1019*/, s33 offset:3632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v1020*/, s33 offset:3636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v1021*/, s33 offset:3640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v1022*/, s33 offset:3644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v1023*/, s33 offset:3648 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -3710,17 +5229,17 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s0, 3 -; GFX1250-DAGISEL-NEXT: v_mov_b32_e32 v2, v0 -; GFX1250-DAGISEL-NEXT: v_swap_b32 v0, v1 -; GFX1250-DAGISEL-NEXT: s_mov_b64 s[0:1], gfx_callee@abs64 -; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s4, 0 ; GFX1250-DAGISEL-NEXT: s_addk_co_i32 s32, 0xe50 +; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s4, 0 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s30, 1 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v40, s31, 2 +; GFX1250-DAGISEL-NEXT: v_mov_b32_e32 v2, v0 +; GFX1250-DAGISEL-NEXT: v_swap_b32 v0, v1 +; GFX1250-DAGISEL-NEXT: s_mov_b64 s[0:1], gfx_callee@abs64 +; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-DAGISEL-NEXT: s_swap_pc_i64 s[30:31], s[0:1] -; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-DAGISEL-NEXT: v_readlane_b32 s31, v40, 2 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s30, v40, 1 +; GFX1250-DAGISEL-NEXT: v_readlane_b32 s31, v40, 2 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s4, v40, 0 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s0, v40, 3 ; GFX1250-DAGISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload @@ -4679,152 +6198,291 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; DAGISEL-NEXT: s_clause 0xf ; DAGISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: v_mov_b32_e32 v2, v0 @@ -4995,152 +6653,291 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v0, s32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; GISEL-NEXT: s_clause 0xf ; GISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: v_mov_b32_e32 v2, v0 @@ -5311,152 +7108,291 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; DAGISEL64-NEXT: s_clause 0xf ; DAGISEL64-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: v_mov_b32_e32 v2, v0 @@ -5627,152 +7563,291 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v0, s32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v31, s32 offset:124 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v86, s32 offset:248 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v87, s32 offset:252 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v151, s32 offset:380 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v213, s32 offset:500 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v215, s32 offset:508 ; GISEL64-NEXT: s_clause 0xf ; GISEL64-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v247, s32 offset:572 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: v_mov_b32_e32 v2, v0 @@ -5940,933 +8015,1830 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6, s32 offset:24 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7, s32 offset:28 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8, s32 offset:32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9, s32 offset:36 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10, s32 offset:40 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11, s32 offset:44 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12, s32 offset:48 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13, s32 offset:52 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14, s32 offset:56 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15, s32 offset:60 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16, s32 offset:64 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17, s32 offset:68 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18, s32 offset:72 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19, s32 offset:76 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20, s32 offset:80 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21, s32 offset:84 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22, s32 offset:88 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23, s32 offset:92 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24, s32 offset:96 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25, s32 offset:100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26, s32 offset:104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27, s32 offset:108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28, s32 offset:112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29, s32 offset:116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30, s32 offset:120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31, s32 offset:124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32, s32 offset:128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33, s32 offset:132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34, s32 offset:136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35, s32 offset:140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36, s32 offset:144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37, s32 offset:148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38, s32 offset:152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39, s32 offset:156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48, s32 offset:160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49, s32 offset:164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50, s32 offset:168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51, s32 offset:172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52, s32 offset:176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53, s32 offset:180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54, s32 offset:184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55, s32 offset:188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64, s32 offset:192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65, s32 offset:196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66, s32 offset:200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67, s32 offset:204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68, s32 offset:208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69, s32 offset:212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70, s32 offset:216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71, s32 offset:220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80, s32 offset:224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81, s32 offset:228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82, s32 offset:232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83, s32 offset:236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84, s32 offset:240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85, s32 offset:244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86, s32 offset:248 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87, s32 offset:252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96, s32 offset:256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97, s32 offset:260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98, s32 offset:264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99, s32 offset:268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100, s32 offset:272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101, s32 offset:276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102, s32 offset:280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103, s32 offset:284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112, s32 offset:288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113, s32 offset:292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114, s32 offset:296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115, s32 offset:300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116, s32 offset:304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117, s32 offset:308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118, s32 offset:312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119, s32 offset:316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128, s32 offset:320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129, s32 offset:324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130, s32 offset:328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131, s32 offset:332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132, s32 offset:336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133, s32 offset:340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134, s32 offset:344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135, s32 offset:348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144, s32 offset:352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145, s32 offset:356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146, s32 offset:360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147, s32 offset:364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148, s32 offset:368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149, s32 offset:372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150, s32 offset:376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151, s32 offset:380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160, s32 offset:384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161, s32 offset:388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162, s32 offset:392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163, s32 offset:396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164, s32 offset:400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165, s32 offset:404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166, s32 offset:408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167, s32 offset:412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176, s32 offset:416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177, s32 offset:420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178, s32 offset:424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179, s32 offset:428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180, s32 offset:432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181, s32 offset:436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182, s32 offset:440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183, s32 offset:444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192, s32 offset:448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193, s32 offset:452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194, s32 offset:456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195, s32 offset:460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196, s32 offset:464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197, s32 offset:468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198, s32 offset:472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199, s32 offset:476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208, s32 offset:480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209, s32 offset:484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210, s32 offset:488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211, s32 offset:492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212, s32 offset:496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213, s32 offset:500 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214, s32 offset:504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215, s32 offset:508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224, s32 offset:512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225, s32 offset:516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226, s32 offset:520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227, s32 offset:524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228, s32 offset:528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229, s32 offset:532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230, s32 offset:536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231, s32 offset:540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240, s32 offset:544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241, s32 offset:548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242, s32 offset:552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243, s32 offset:556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244, s32 offset:560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245, s32 offset:564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246, s32 offset:568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247, s32 offset:572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 4 ; msbs: dst=0 src0=0 src1=1 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v256*/, s32 offset:576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v257*/, s32 offset:580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v258*/, s32 offset:584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v259*/, s32 offset:588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v260*/, s32 offset:592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v261*/, s32 offset:596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v262*/, s32 offset:600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v263*/, s32 offset:604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v264*/, s32 offset:608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v265*/, s32 offset:612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v266*/, s32 offset:616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v267*/, s32 offset:620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v268*/, s32 offset:624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v269*/, s32 offset:628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v270*/, s32 offset:632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v271*/, s32 offset:636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v272*/, s32 offset:640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v273*/, s32 offset:644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v274*/, s32 offset:648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v275*/, s32 offset:652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v276*/, s32 offset:656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v277*/, s32 offset:660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v278*/, s32 offset:664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v279*/, s32 offset:668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v280*/, s32 offset:672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v281*/, s32 offset:676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v282*/, s32 offset:680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v283*/, s32 offset:684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v284*/, s32 offset:688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v285*/, s32 offset:692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v286*/, s32 offset:696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v287*/, s32 offset:700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v288*/, s32 offset:704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v289*/, s32 offset:708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v290*/, s32 offset:712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v291*/, s32 offset:716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v292*/, s32 offset:720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v293*/, s32 offset:724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v294*/, s32 offset:728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v295*/, s32 offset:732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v296*/, s32 offset:736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v297*/, s32 offset:740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v298*/, s32 offset:744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v299*/, s32 offset:748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v300*/, s32 offset:752 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v301*/, s32 offset:756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v302*/, s32 offset:760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v303*/, s32 offset:764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v304*/, s32 offset:768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v305*/, s32 offset:772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v306*/, s32 offset:776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v307*/, s32 offset:780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v308*/, s32 offset:784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v309*/, s32 offset:788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v310*/, s32 offset:792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v311*/, s32 offset:796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v312*/, s32 offset:800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v313*/, s32 offset:804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v314*/, s32 offset:808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v315*/, s32 offset:812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v316*/, s32 offset:816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v317*/, s32 offset:820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v318*/, s32 offset:824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v319*/, s32 offset:828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v320*/, s32 offset:832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v321*/, s32 offset:836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v322*/, s32 offset:840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v323*/, s32 offset:844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v324*/, s32 offset:848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v325*/, s32 offset:852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v326*/, s32 offset:856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v327*/, s32 offset:860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v328*/, s32 offset:864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v329*/, s32 offset:868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v330*/, s32 offset:872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v331*/, s32 offset:876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v332*/, s32 offset:880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v333*/, s32 offset:884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v334*/, s32 offset:888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v335*/, s32 offset:892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v336*/, s32 offset:896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v337*/, s32 offset:900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v338*/, s32 offset:904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v339*/, s32 offset:908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v340*/, s32 offset:912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v341*/, s32 offset:916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v342*/, s32 offset:920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v343*/, s32 offset:924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v344*/, s32 offset:928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v345*/, s32 offset:932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v346*/, s32 offset:936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v347*/, s32 offset:940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v348*/, s32 offset:944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v349*/, s32 offset:948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v350*/, s32 offset:952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v351*/, s32 offset:956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v352*/, s32 offset:960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v353*/, s32 offset:964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v354*/, s32 offset:968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v355*/, s32 offset:972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v356*/, s32 offset:976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v357*/, s32 offset:980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v358*/, s32 offset:984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v359*/, s32 offset:988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v360*/, s32 offset:992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v361*/, s32 offset:996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v362*/, s32 offset:1000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v363*/, s32 offset:1004 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v364*/, s32 offset:1008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v365*/, s32 offset:1012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v366*/, s32 offset:1016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v367*/, s32 offset:1020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v368*/, s32 offset:1024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v369*/, s32 offset:1028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v370*/, s32 offset:1032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v371*/, s32 offset:1036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v372*/, s32 offset:1040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v373*/, s32 offset:1044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v374*/, s32 offset:1048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v375*/, s32 offset:1052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v376*/, s32 offset:1056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v377*/, s32 offset:1060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v378*/, s32 offset:1064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v379*/, s32 offset:1068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v380*/, s32 offset:1072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v381*/, s32 offset:1076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v382*/, s32 offset:1080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v383*/, s32 offset:1084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v384*/, s32 offset:1088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v385*/, s32 offset:1092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v386*/, s32 offset:1096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v387*/, s32 offset:1100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v388*/, s32 offset:1104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v389*/, s32 offset:1108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v390*/, s32 offset:1112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v391*/, s32 offset:1116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v392*/, s32 offset:1120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v393*/, s32 offset:1124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v394*/, s32 offset:1128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v395*/, s32 offset:1132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v396*/, s32 offset:1136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v397*/, s32 offset:1140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v398*/, s32 offset:1144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v399*/, s32 offset:1148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v400*/, s32 offset:1152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v401*/, s32 offset:1156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v402*/, s32 offset:1160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v403*/, s32 offset:1164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v404*/, s32 offset:1168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v405*/, s32 offset:1172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v406*/, s32 offset:1176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v407*/, s32 offset:1180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v408*/, s32 offset:1184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v409*/, s32 offset:1188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v410*/, s32 offset:1192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v411*/, s32 offset:1196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v412*/, s32 offset:1200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v413*/, s32 offset:1204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v414*/, s32 offset:1208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v415*/, s32 offset:1212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v416*/, s32 offset:1216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v417*/, s32 offset:1220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v418*/, s32 offset:1224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v419*/, s32 offset:1228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v420*/, s32 offset:1232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v421*/, s32 offset:1236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v422*/, s32 offset:1240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v423*/, s32 offset:1244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v424*/, s32 offset:1248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v425*/, s32 offset:1252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v426*/, s32 offset:1256 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v427*/, s32 offset:1260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v428*/, s32 offset:1264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v429*/, s32 offset:1268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v430*/, s32 offset:1272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v431*/, s32 offset:1276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v432*/, s32 offset:1280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v433*/, s32 offset:1284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v434*/, s32 offset:1288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v435*/, s32 offset:1292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v436*/, s32 offset:1296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v437*/, s32 offset:1300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v438*/, s32 offset:1304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v439*/, s32 offset:1308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v440*/, s32 offset:1312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v441*/, s32 offset:1316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v442*/, s32 offset:1320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v443*/, s32 offset:1324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v444*/, s32 offset:1328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v445*/, s32 offset:1332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v446*/, s32 offset:1336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v447*/, s32 offset:1340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v448*/, s32 offset:1344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v449*/, s32 offset:1348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v450*/, s32 offset:1352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v451*/, s32 offset:1356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v452*/, s32 offset:1360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v453*/, s32 offset:1364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v454*/, s32 offset:1368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v455*/, s32 offset:1372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v456*/, s32 offset:1376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v457*/, s32 offset:1380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v458*/, s32 offset:1384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v459*/, s32 offset:1388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v460*/, s32 offset:1392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v461*/, s32 offset:1396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v462*/, s32 offset:1400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v463*/, s32 offset:1404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v464*/, s32 offset:1408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v465*/, s32 offset:1412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v466*/, s32 offset:1416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v467*/, s32 offset:1420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v468*/, s32 offset:1424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v469*/, s32 offset:1428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v470*/, s32 offset:1432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v471*/, s32 offset:1436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v472*/, s32 offset:1440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v473*/, s32 offset:1444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v474*/, s32 offset:1448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v475*/, s32 offset:1452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v476*/, s32 offset:1456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v477*/, s32 offset:1460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v478*/, s32 offset:1464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v479*/, s32 offset:1468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v480*/, s32 offset:1472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v481*/, s32 offset:1476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v482*/, s32 offset:1480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v483*/, s32 offset:1484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v484*/, s32 offset:1488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v485*/, s32 offset:1492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v486*/, s32 offset:1496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v487*/, s32 offset:1500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v488*/, s32 offset:1504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v489*/, s32 offset:1508 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v490*/, s32 offset:1512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v491*/, s32 offset:1516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v492*/, s32 offset:1520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v493*/, s32 offset:1524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v494*/, s32 offset:1528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v495*/, s32 offset:1532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v496*/, s32 offset:1536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v497*/, s32 offset:1540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v498*/, s32 offset:1544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v499*/, s32 offset:1548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v500*/, s32 offset:1552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v501*/, s32 offset:1556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v502*/, s32 offset:1560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v503*/, s32 offset:1564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v504*/, s32 offset:1568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v505*/, s32 offset:1572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v506*/, s32 offset:1576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v507*/, s32 offset:1580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v508*/, s32 offset:1584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s32 offset:1588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s32 offset:1592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s32 offset:1596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s32 offset:1600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s32 offset:1604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s32 offset:1608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v515*/, s32 offset:1612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v516*/, s32 offset:1616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v517*/, s32 offset:1620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v518*/, s32 offset:1624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v519*/, s32 offset:1628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v520*/, s32 offset:1632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v521*/, s32 offset:1636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v522*/, s32 offset:1640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v523*/, s32 offset:1644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v524*/, s32 offset:1648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v525*/, s32 offset:1652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v526*/, s32 offset:1656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v527*/, s32 offset:1660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v528*/, s32 offset:1664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v529*/, s32 offset:1668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v530*/, s32 offset:1672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v531*/, s32 offset:1676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v532*/, s32 offset:1680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v533*/, s32 offset:1684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v534*/, s32 offset:1688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v535*/, s32 offset:1692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v536*/, s32 offset:1696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v537*/, s32 offset:1700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v538*/, s32 offset:1704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v539*/, s32 offset:1708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v540*/, s32 offset:1712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v541*/, s32 offset:1716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v542*/, s32 offset:1720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v543*/, s32 offset:1724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v544*/, s32 offset:1728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v545*/, s32 offset:1732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v546*/, s32 offset:1736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v547*/, s32 offset:1740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v548*/, s32 offset:1744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v549*/, s32 offset:1748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v550*/, s32 offset:1752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v551*/, s32 offset:1756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v552*/, s32 offset:1760 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v553*/, s32 offset:1764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v554*/, s32 offset:1768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v555*/, s32 offset:1772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v556*/, s32 offset:1776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v557*/, s32 offset:1780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v558*/, s32 offset:1784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v559*/, s32 offset:1788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v560*/, s32 offset:1792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v561*/, s32 offset:1796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v562*/, s32 offset:1800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v563*/, s32 offset:1804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v564*/, s32 offset:1808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v565*/, s32 offset:1812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v566*/, s32 offset:1816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v567*/, s32 offset:1820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v568*/, s32 offset:1824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v569*/, s32 offset:1828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v570*/, s32 offset:1832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v571*/, s32 offset:1836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v572*/, s32 offset:1840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v573*/, s32 offset:1844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v574*/, s32 offset:1848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v575*/, s32 offset:1852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v576*/, s32 offset:1856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v577*/, s32 offset:1860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v578*/, s32 offset:1864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v579*/, s32 offset:1868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v580*/, s32 offset:1872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v581*/, s32 offset:1876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v582*/, s32 offset:1880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v583*/, s32 offset:1884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v584*/, s32 offset:1888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v585*/, s32 offset:1892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v586*/, s32 offset:1896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v587*/, s32 offset:1900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v588*/, s32 offset:1904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v589*/, s32 offset:1908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v590*/, s32 offset:1912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v591*/, s32 offset:1916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v592*/, s32 offset:1920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v593*/, s32 offset:1924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v594*/, s32 offset:1928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v595*/, s32 offset:1932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v596*/, s32 offset:1936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v597*/, s32 offset:1940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v598*/, s32 offset:1944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v599*/, s32 offset:1948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v600*/, s32 offset:1952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v601*/, s32 offset:1956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v602*/, s32 offset:1960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v603*/, s32 offset:1964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v604*/, s32 offset:1968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v605*/, s32 offset:1972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v606*/, s32 offset:1976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v607*/, s32 offset:1980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v608*/, s32 offset:1984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v609*/, s32 offset:1988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v610*/, s32 offset:1992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v611*/, s32 offset:1996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v612*/, s32 offset:2000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v613*/, s32 offset:2004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v614*/, s32 offset:2008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v615*/, s32 offset:2012 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v616*/, s32 offset:2016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v617*/, s32 offset:2020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v618*/, s32 offset:2024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v619*/, s32 offset:2028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v620*/, s32 offset:2032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v621*/, s32 offset:2036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v622*/, s32 offset:2040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v623*/, s32 offset:2044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v624*/, s32 offset:2048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v625*/, s32 offset:2052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v626*/, s32 offset:2056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v627*/, s32 offset:2060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v628*/, s32 offset:2064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v629*/, s32 offset:2068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v630*/, s32 offset:2072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v631*/, s32 offset:2076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v632*/, s32 offset:2080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v633*/, s32 offset:2084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v634*/, s32 offset:2088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v635*/, s32 offset:2092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v636*/, s32 offset:2096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v637*/, s32 offset:2100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v638*/, s32 offset:2104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v639*/, s32 offset:2108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v640*/, s32 offset:2112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v641*/, s32 offset:2116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v642*/, s32 offset:2120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v643*/, s32 offset:2124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v644*/, s32 offset:2128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v645*/, s32 offset:2132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v646*/, s32 offset:2136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v647*/, s32 offset:2140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v648*/, s32 offset:2144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v649*/, s32 offset:2148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v650*/, s32 offset:2152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v651*/, s32 offset:2156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v652*/, s32 offset:2160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v653*/, s32 offset:2164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v654*/, s32 offset:2168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v655*/, s32 offset:2172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v656*/, s32 offset:2176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v657*/, s32 offset:2180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v658*/, s32 offset:2184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v659*/, s32 offset:2188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v660*/, s32 offset:2192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v661*/, s32 offset:2196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v662*/, s32 offset:2200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v663*/, s32 offset:2204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v664*/, s32 offset:2208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v665*/, s32 offset:2212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v666*/, s32 offset:2216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v667*/, s32 offset:2220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v668*/, s32 offset:2224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v669*/, s32 offset:2228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v670*/, s32 offset:2232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v671*/, s32 offset:2236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v672*/, s32 offset:2240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v673*/, s32 offset:2244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v674*/, s32 offset:2248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v675*/, s32 offset:2252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v676*/, s32 offset:2256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v677*/, s32 offset:2260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v678*/, s32 offset:2264 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v679*/, s32 offset:2268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v680*/, s32 offset:2272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v681*/, s32 offset:2276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v682*/, s32 offset:2280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v683*/, s32 offset:2284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v684*/, s32 offset:2288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v685*/, s32 offset:2292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v686*/, s32 offset:2296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v687*/, s32 offset:2300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v688*/, s32 offset:2304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v689*/, s32 offset:2308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v690*/, s32 offset:2312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v691*/, s32 offset:2316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v692*/, s32 offset:2320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v693*/, s32 offset:2324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v694*/, s32 offset:2328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v695*/, s32 offset:2332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v696*/, s32 offset:2336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v697*/, s32 offset:2340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v698*/, s32 offset:2344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v699*/, s32 offset:2348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v700*/, s32 offset:2352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v701*/, s32 offset:2356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v702*/, s32 offset:2360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v703*/, s32 offset:2364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v704*/, s32 offset:2368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v705*/, s32 offset:2372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v706*/, s32 offset:2376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v707*/, s32 offset:2380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v708*/, s32 offset:2384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v709*/, s32 offset:2388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v710*/, s32 offset:2392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v711*/, s32 offset:2396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v712*/, s32 offset:2400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v713*/, s32 offset:2404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v714*/, s32 offset:2408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v715*/, s32 offset:2412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v716*/, s32 offset:2416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v717*/, s32 offset:2420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v718*/, s32 offset:2424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v719*/, s32 offset:2428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v720*/, s32 offset:2432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v721*/, s32 offset:2436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v722*/, s32 offset:2440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v723*/, s32 offset:2444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v724*/, s32 offset:2448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v725*/, s32 offset:2452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v726*/, s32 offset:2456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v727*/, s32 offset:2460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v728*/, s32 offset:2464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v729*/, s32 offset:2468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v730*/, s32 offset:2472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v731*/, s32 offset:2476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v732*/, s32 offset:2480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v733*/, s32 offset:2484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v734*/, s32 offset:2488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v735*/, s32 offset:2492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v736*/, s32 offset:2496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v737*/, s32 offset:2500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v738*/, s32 offset:2504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v739*/, s32 offset:2508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v740*/, s32 offset:2512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v741*/, s32 offset:2516 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v742*/, s32 offset:2520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v743*/, s32 offset:2524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v744*/, s32 offset:2528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v745*/, s32 offset:2532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v746*/, s32 offset:2536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v747*/, s32 offset:2540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v748*/, s32 offset:2544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v749*/, s32 offset:2548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v750*/, s32 offset:2552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v751*/, s32 offset:2556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v752*/, s32 offset:2560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v753*/, s32 offset:2564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v754*/, s32 offset:2568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v755*/, s32 offset:2572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v756*/, s32 offset:2576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v757*/, s32 offset:2580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v758*/, s32 offset:2584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v759*/, s32 offset:2588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v760*/, s32 offset:2592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v761*/, s32 offset:2596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v762*/, s32 offset:2600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v763*/, s32 offset:2604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v764*/, s32 offset:2608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s32 offset:2612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s32 offset:2616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s32 offset:2620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s32 offset:2624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s32 offset:2628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s32 offset:2632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v771*/, s32 offset:2636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v772*/, s32 offset:2640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v773*/, s32 offset:2644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v774*/, s32 offset:2648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v775*/, s32 offset:2652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v776*/, s32 offset:2656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v777*/, s32 offset:2660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v778*/, s32 offset:2664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v779*/, s32 offset:2668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v780*/, s32 offset:2672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v781*/, s32 offset:2676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v782*/, s32 offset:2680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v783*/, s32 offset:2684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v784*/, s32 offset:2688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v785*/, s32 offset:2692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v786*/, s32 offset:2696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v787*/, s32 offset:2700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v788*/, s32 offset:2704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v789*/, s32 offset:2708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v790*/, s32 offset:2712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v791*/, s32 offset:2716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v792*/, s32 offset:2720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v793*/, s32 offset:2724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v794*/, s32 offset:2728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v795*/, s32 offset:2732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v796*/, s32 offset:2736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v797*/, s32 offset:2740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v798*/, s32 offset:2744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v799*/, s32 offset:2748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v800*/, s32 offset:2752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v801*/, s32 offset:2756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v802*/, s32 offset:2760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v803*/, s32 offset:2764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v804*/, s32 offset:2768 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v805*/, s32 offset:2772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v806*/, s32 offset:2776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v807*/, s32 offset:2780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v808*/, s32 offset:2784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v809*/, s32 offset:2788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v810*/, s32 offset:2792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v811*/, s32 offset:2796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v812*/, s32 offset:2800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v813*/, s32 offset:2804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v814*/, s32 offset:2808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v815*/, s32 offset:2812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v816*/, s32 offset:2816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v817*/, s32 offset:2820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v818*/, s32 offset:2824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v819*/, s32 offset:2828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v820*/, s32 offset:2832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v821*/, s32 offset:2836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v822*/, s32 offset:2840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v823*/, s32 offset:2844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v824*/, s32 offset:2848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v825*/, s32 offset:2852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v826*/, s32 offset:2856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v827*/, s32 offset:2860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v828*/, s32 offset:2864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v829*/, s32 offset:2868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v830*/, s32 offset:2872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v831*/, s32 offset:2876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v832*/, s32 offset:2880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v833*/, s32 offset:2884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v834*/, s32 offset:2888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v835*/, s32 offset:2892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v836*/, s32 offset:2896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v837*/, s32 offset:2900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v838*/, s32 offset:2904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v839*/, s32 offset:2908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v840*/, s32 offset:2912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v841*/, s32 offset:2916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v842*/, s32 offset:2920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v843*/, s32 offset:2924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v844*/, s32 offset:2928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v845*/, s32 offset:2932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v846*/, s32 offset:2936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v847*/, s32 offset:2940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v848*/, s32 offset:2944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v849*/, s32 offset:2948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v850*/, s32 offset:2952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v851*/, s32 offset:2956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v852*/, s32 offset:2960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v853*/, s32 offset:2964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v854*/, s32 offset:2968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v855*/, s32 offset:2972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v856*/, s32 offset:2976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v857*/, s32 offset:2980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v858*/, s32 offset:2984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v859*/, s32 offset:2988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v860*/, s32 offset:2992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v861*/, s32 offset:2996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v862*/, s32 offset:3000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v863*/, s32 offset:3004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v864*/, s32 offset:3008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v865*/, s32 offset:3012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v866*/, s32 offset:3016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v867*/, s32 offset:3020 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v868*/, s32 offset:3024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v869*/, s32 offset:3028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v870*/, s32 offset:3032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v871*/, s32 offset:3036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v872*/, s32 offset:3040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v873*/, s32 offset:3044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v874*/, s32 offset:3048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v875*/, s32 offset:3052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v876*/, s32 offset:3056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v877*/, s32 offset:3060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v878*/, s32 offset:3064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v879*/, s32 offset:3068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v880*/, s32 offset:3072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v881*/, s32 offset:3076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v882*/, s32 offset:3080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v883*/, s32 offset:3084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v884*/, s32 offset:3088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v885*/, s32 offset:3092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v886*/, s32 offset:3096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v887*/, s32 offset:3100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v888*/, s32 offset:3104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v889*/, s32 offset:3108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v890*/, s32 offset:3112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v891*/, s32 offset:3116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v892*/, s32 offset:3120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v893*/, s32 offset:3124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v894*/, s32 offset:3128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v895*/, s32 offset:3132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v896*/, s32 offset:3136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v897*/, s32 offset:3140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v898*/, s32 offset:3144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v899*/, s32 offset:3148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v900*/, s32 offset:3152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v901*/, s32 offset:3156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v902*/, s32 offset:3160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v903*/, s32 offset:3164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v904*/, s32 offset:3168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v905*/, s32 offset:3172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v906*/, s32 offset:3176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v907*/, s32 offset:3180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v908*/, s32 offset:3184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v909*/, s32 offset:3188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v910*/, s32 offset:3192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v911*/, s32 offset:3196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v912*/, s32 offset:3200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v913*/, s32 offset:3204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v914*/, s32 offset:3208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v915*/, s32 offset:3212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v916*/, s32 offset:3216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v917*/, s32 offset:3220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v918*/, s32 offset:3224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v919*/, s32 offset:3228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v920*/, s32 offset:3232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v921*/, s32 offset:3236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v922*/, s32 offset:3240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v923*/, s32 offset:3244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v924*/, s32 offset:3248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v925*/, s32 offset:3252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v926*/, s32 offset:3256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v927*/, s32 offset:3260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v928*/, s32 offset:3264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v929*/, s32 offset:3268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v930*/, s32 offset:3272 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v931*/, s32 offset:3276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v932*/, s32 offset:3280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v933*/, s32 offset:3284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v934*/, s32 offset:3288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v935*/, s32 offset:3292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v936*/, s32 offset:3296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v937*/, s32 offset:3300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v938*/, s32 offset:3304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v939*/, s32 offset:3308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v940*/, s32 offset:3312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v941*/, s32 offset:3316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v942*/, s32 offset:3320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v943*/, s32 offset:3324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v944*/, s32 offset:3328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v945*/, s32 offset:3332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v946*/, s32 offset:3336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v947*/, s32 offset:3340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v948*/, s32 offset:3344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v949*/, s32 offset:3348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v950*/, s32 offset:3352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v951*/, s32 offset:3356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v952*/, s32 offset:3360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v953*/, s32 offset:3364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v954*/, s32 offset:3368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v955*/, s32 offset:3372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v956*/, s32 offset:3376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v957*/, s32 offset:3380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v958*/, s32 offset:3384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v959*/, s32 offset:3388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v960*/, s32 offset:3392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v961*/, s32 offset:3396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v962*/, s32 offset:3400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v963*/, s32 offset:3404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v964*/, s32 offset:3408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v965*/, s32 offset:3412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v966*/, s32 offset:3416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v967*/, s32 offset:3420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v968*/, s32 offset:3424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v969*/, s32 offset:3428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v970*/, s32 offset:3432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v971*/, s32 offset:3436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v972*/, s32 offset:3440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v973*/, s32 offset:3444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v974*/, s32 offset:3448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v975*/, s32 offset:3452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v976*/, s32 offset:3456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v977*/, s32 offset:3460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v978*/, s32 offset:3464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v979*/, s32 offset:3468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v980*/, s32 offset:3472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v981*/, s32 offset:3476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v982*/, s32 offset:3480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v983*/, s32 offset:3484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v984*/, s32 offset:3488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v985*/, s32 offset:3492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v986*/, s32 offset:3496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v987*/, s32 offset:3500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v988*/, s32 offset:3504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v989*/, s32 offset:3508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v990*/, s32 offset:3512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v991*/, s32 offset:3516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v992*/, s32 offset:3520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v993*/, s32 offset:3524 ; GFX1250-DAGISEL-NEXT: s_clause 0x1d ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v994*/, s32 offset:3528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v995*/, s32 offset:3532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v996*/, s32 offset:3536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v997*/, s32 offset:3540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v998*/, s32 offset:3544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v999*/, s32 offset:3548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v1000*/, s32 offset:3552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v1001*/, s32 offset:3556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v1002*/, s32 offset:3560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v1003*/, s32 offset:3564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v1004*/, s32 offset:3568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v1005*/, s32 offset:3572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v1006*/, s32 offset:3576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v1007*/, s32 offset:3580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v1008*/, s32 offset:3584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v1009*/, s32 offset:3588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v1010*/, s32 offset:3592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v1011*/, s32 offset:3596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v1012*/, s32 offset:3600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v1013*/, s32 offset:3604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v1014*/, s32 offset:3608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v1015*/, s32 offset:3612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v1016*/, s32 offset:3616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v1017*/, s32 offset:3620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v1018*/, s32 offset:3624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v1019*/, s32 offset:3628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v1020*/, s32 offset:3632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v1021*/, s32 offset:3636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v1022*/, s32 offset:3640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v1023*/, s32 offset:3644 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 @@ -7884,172 +10856,313 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:172 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:176 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:180 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:184 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:188 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:192 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:196 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:200 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:204 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:208 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:212 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:216 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:220 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:224 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:228 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:232 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:236 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:240 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:244 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:248 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:252 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:256 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:260 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:264 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:268 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:272 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:276 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:280 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:284 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:288 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:292 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:296 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:300 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:304 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:308 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:312 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:316 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:320 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:324 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:328 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:332 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:336 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:340 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:344 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:348 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:352 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:356 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:360 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:364 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:368 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:372 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:376 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:380 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:384 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:388 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:392 ; DAGISEL-NEXT: s_clause 0x1f ; DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:396 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:400 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:404 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:408 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:412 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:416 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:420 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:424 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:428 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:432 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:436 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:440 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:444 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:448 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:452 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:456 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:460 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:464 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:468 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:472 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:476 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:480 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:484 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:488 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:492 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:496 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:500 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:504 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:508 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:512 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:516 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:520 ; DAGISEL-NEXT: s_clause 0xf ; DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:524 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:528 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:532 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:536 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:540 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:544 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:548 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:552 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:556 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:560 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:564 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:568 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:572 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:576 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:580 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:584 ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; DAGISEL-NEXT: s_clause 0x2 ; DAGISEL-NEXT: scratch_store_b32 off, v42, s33 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164 +; DAGISEL-NEXT: ; meta instruction ; DAGISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: v_writelane_b32 v42, s0, 3 -; DAGISEL-NEXT: s_mov_b32 s1, callee@abs32@hi -; DAGISEL-NEXT: s_mov_b32 s0, callee@abs32@lo ; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250 -; DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8 ; DAGISEL-NEXT: v_writelane_b32 v42, s4, 0 ; DAGISEL-NEXT: v_writelane_b32 v42, s30, 1 ; DAGISEL-NEXT: v_writelane_b32 v42, s31, 2 +; DAGISEL-NEXT: s_mov_b32 s1, callee@abs32@hi +; DAGISEL-NEXT: s_mov_b32 s0, callee@abs32@lo +; DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8 ; DAGISEL-NEXT: s_wait_alu 0xfffe ; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; DAGISEL-NEXT: flat_store_b32 v[40:41], v0 -; DAGISEL-NEXT: v_readlane_b32 s31, v42, 2 ; DAGISEL-NEXT: v_readlane_b32 s30, v42, 1 +; DAGISEL-NEXT: v_readlane_b32 s31, v42, 2 ; DAGISEL-NEXT: v_readlane_b32 s4, v42, 0 ; DAGISEL-NEXT: v_readlane_b32 s0, v42, 3 ; DAGISEL-NEXT: s_clause 0x2 @@ -8225,172 +11338,313 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v48, s33 offset:172 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v49, s33 offset:176 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v50, s33 offset:180 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v51, s33 offset:184 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v52, s33 offset:188 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v53, s33 offset:192 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v54, s33 offset:196 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v55, s33 offset:200 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v64, s33 offset:204 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v65, s33 offset:208 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v66, s33 offset:212 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v67, s33 offset:216 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v68, s33 offset:220 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v69, s33 offset:224 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v70, s33 offset:228 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v71, s33 offset:232 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v80, s33 offset:236 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v81, s33 offset:240 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v82, s33 offset:244 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v83, s33 offset:248 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v84, s33 offset:252 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v85, s33 offset:256 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v86, s33 offset:260 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v87, s33 offset:264 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v96, s33 offset:268 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v97, s33 offset:272 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v98, s33 offset:276 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v99, s33 offset:280 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v100, s33 offset:284 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v101, s33 offset:288 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v102, s33 offset:292 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v103, s33 offset:296 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v112, s33 offset:300 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v113, s33 offset:304 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v114, s33 offset:308 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v115, s33 offset:312 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v116, s33 offset:316 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v117, s33 offset:320 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v118, s33 offset:324 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v119, s33 offset:328 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v128, s33 offset:332 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v129, s33 offset:336 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v130, s33 offset:340 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v131, s33 offset:344 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v132, s33 offset:348 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v133, s33 offset:352 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v134, s33 offset:356 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v135, s33 offset:360 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v144, s33 offset:364 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v145, s33 offset:368 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v146, s33 offset:372 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v147, s33 offset:376 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v148, s33 offset:380 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v149, s33 offset:384 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v150, s33 offset:388 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v151, s33 offset:392 ; GISEL-NEXT: s_clause 0x1f ; GISEL-NEXT: scratch_store_b32 off, v160, s33 offset:396 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v161, s33 offset:400 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v162, s33 offset:404 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v163, s33 offset:408 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v164, s33 offset:412 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v165, s33 offset:416 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v166, s33 offset:420 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v167, s33 offset:424 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v176, s33 offset:428 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v177, s33 offset:432 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v178, s33 offset:436 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v179, s33 offset:440 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v180, s33 offset:444 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v181, s33 offset:448 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v182, s33 offset:452 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v183, s33 offset:456 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v192, s33 offset:460 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v193, s33 offset:464 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v194, s33 offset:468 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v195, s33 offset:472 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v196, s33 offset:476 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v197, s33 offset:480 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v198, s33 offset:484 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v199, s33 offset:488 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v208, s33 offset:492 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v209, s33 offset:496 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v210, s33 offset:500 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v211, s33 offset:504 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v212, s33 offset:508 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v213, s33 offset:512 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v214, s33 offset:516 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v215, s33 offset:520 ; GISEL-NEXT: s_clause 0xf ; GISEL-NEXT: scratch_store_b32 off, v224, s33 offset:524 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v225, s33 offset:528 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v226, s33 offset:532 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v227, s33 offset:536 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v228, s33 offset:540 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v229, s33 offset:544 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v230, s33 offset:548 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v231, s33 offset:552 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v240, s33 offset:556 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v241, s33 offset:560 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v242, s33 offset:564 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v243, s33 offset:568 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v244, s33 offset:572 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v245, s33 offset:576 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v246, s33 offset:580 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v247, s33 offset:584 ; GISEL-NEXT: s_mov_b32 exec_lo, -1 ; GISEL-NEXT: s_clause 0x2 ; GISEL-NEXT: scratch_store_b32 off, v42, s33 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164 +; GISEL-NEXT: ; meta instruction ; GISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: v_writelane_b32 v42, s0, 3 -; GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo -; GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi ; GISEL-NEXT: s_addk_co_i32 s32, 0x250 -; GISEL-NEXT: v_dual_mov_b32 v40, v8 :: v_dual_mov_b32 v41, v9 ; GISEL-NEXT: v_writelane_b32 v42, s4, 0 ; GISEL-NEXT: v_writelane_b32 v42, s30, 1 ; GISEL-NEXT: v_writelane_b32 v42, s31, 2 +; GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo +; GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi +; GISEL-NEXT: v_dual_mov_b32 v40, v8 :: v_dual_mov_b32 v41, v9 ; GISEL-NEXT: s_wait_alu 0xfffe ; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GISEL-NEXT: flat_store_b32 v[40:41], v0 -; GISEL-NEXT: v_readlane_b32 s31, v42, 2 ; GISEL-NEXT: v_readlane_b32 s30, v42, 1 +; GISEL-NEXT: v_readlane_b32 s31, v42, 2 ; GISEL-NEXT: v_readlane_b32 s4, v42, 0 ; GISEL-NEXT: v_readlane_b32 s0, v42, 3 ; GISEL-NEXT: s_clause 0x2 @@ -8566,174 +11820,315 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; DAGISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:172 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:176 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:180 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:184 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:188 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:192 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:196 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:200 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:204 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:208 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:212 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:216 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:220 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:224 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:228 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:232 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:236 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:240 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:244 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:248 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:252 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:256 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:260 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:264 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:268 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:272 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:276 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:280 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:284 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:288 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:292 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:296 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:300 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:304 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:308 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:312 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:316 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:320 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:324 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:328 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:332 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:336 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:340 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:344 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:348 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:352 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:356 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:360 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:364 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:368 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:372 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:376 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:380 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:384 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:388 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:392 ; DAGISEL64-NEXT: s_clause 0x1f ; DAGISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:396 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:400 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:404 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:408 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:412 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:416 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:420 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:424 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:428 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:432 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:436 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:440 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:444 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:448 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:452 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:456 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:460 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:464 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:468 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:472 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:476 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:480 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:484 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:488 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:492 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:496 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:500 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:504 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:508 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:512 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:516 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:520 ; DAGISEL64-NEXT: s_clause 0xf ; DAGISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:524 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:528 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:532 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:536 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:540 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:544 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:548 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:552 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:556 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:560 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:564 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:568 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:572 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:576 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:580 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:584 ; DAGISEL64-NEXT: s_mov_b64 exec, -1 ; DAGISEL64-NEXT: s_clause 0x2 ; DAGISEL64-NEXT: scratch_store_b32 off, v42, s33 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v40, s33 offset:164 +; DAGISEL64-NEXT: ; meta instruction ; DAGISEL64-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: v_writelane_b32 v42, s0, 4 -; DAGISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi -; DAGISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo ; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250 -; DAGISEL64-NEXT: v_mov_b32_e32 v41, v9 ; DAGISEL64-NEXT: v_writelane_b32 v42, s4, 0 -; DAGISEL64-NEXT: v_mov_b32_e32 v40, v8 ; DAGISEL64-NEXT: v_writelane_b32 v42, s5, 1 ; DAGISEL64-NEXT: v_writelane_b32 v42, s30, 2 ; DAGISEL64-NEXT: v_writelane_b32 v42, s31, 3 +; DAGISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi +; DAGISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo +; DAGISEL64-NEXT: v_mov_b32_e32 v41, v9 +; DAGISEL64-NEXT: v_mov_b32_e32 v40, v8 ; DAGISEL64-NEXT: s_wait_alu 0xfffe ; DAGISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] ; DAGISEL64-NEXT: flat_store_b32 v[40:41], v0 -; DAGISEL64-NEXT: v_readlane_b32 s31, v42, 3 ; DAGISEL64-NEXT: v_readlane_b32 s30, v42, 2 +; DAGISEL64-NEXT: v_readlane_b32 s31, v42, 3 ; DAGISEL64-NEXT: v_readlane_b32 s5, v42, 1 ; DAGISEL64-NEXT: v_readlane_b32 s4, v42, 0 ; DAGISEL64-NEXT: v_readlane_b32 s0, v42, 4 @@ -8910,174 +12305,315 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:172 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:176 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:180 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:184 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:188 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:192 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:196 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:200 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:204 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:208 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:212 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:216 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:220 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:224 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:228 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:232 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:236 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:240 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:244 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:248 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:252 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:256 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:260 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:264 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:268 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:272 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:276 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:280 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:284 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:288 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:292 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:296 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:300 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:304 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:308 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:312 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:316 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:320 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:324 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:328 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:332 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:336 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:340 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:344 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:348 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:352 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:356 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:360 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:364 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:368 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:372 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:376 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:380 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:384 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:388 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:392 ; GISEL64-NEXT: s_clause 0x1f ; GISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:396 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:400 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:404 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:408 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:412 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:416 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:420 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:424 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:428 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:432 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:436 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:440 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:444 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:448 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:452 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:456 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:460 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:464 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:468 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:472 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:476 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:480 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:484 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:488 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:492 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:496 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:500 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:504 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:508 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:512 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:516 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:520 ; GISEL64-NEXT: s_clause 0xf ; GISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:524 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:528 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:532 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:536 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:540 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:544 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:548 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:552 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:556 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:560 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:564 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:568 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:572 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:576 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:580 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:584 ; GISEL64-NEXT: s_mov_b64 exec, -1 ; GISEL64-NEXT: s_clause 0x2 ; GISEL64-NEXT: scratch_store_b32 off, v42, s33 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v40, s33 offset:164 +; GISEL64-NEXT: ; meta instruction ; GISEL64-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: v_writelane_b32 v42, s0, 4 -; GISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo -; GISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi ; GISEL64-NEXT: s_addk_co_i32 s32, 0x250 -; GISEL64-NEXT: v_mov_b32_e32 v40, v8 ; GISEL64-NEXT: v_writelane_b32 v42, s4, 0 -; GISEL64-NEXT: v_mov_b32_e32 v41, v9 ; GISEL64-NEXT: v_writelane_b32 v42, s5, 1 ; GISEL64-NEXT: v_writelane_b32 v42, s30, 2 ; GISEL64-NEXT: v_writelane_b32 v42, s31, 3 +; GISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo +; GISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi +; GISEL64-NEXT: v_mov_b32_e32 v40, v8 +; GISEL64-NEXT: v_mov_b32_e32 v41, v9 ; GISEL64-NEXT: s_wait_alu 0xfffe ; GISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GISEL64-NEXT: flat_store_b32 v[40:41], v0 -; GISEL64-NEXT: v_readlane_b32 s31, v42, 3 ; GISEL64-NEXT: v_readlane_b32 s30, v42, 2 +; GISEL64-NEXT: v_readlane_b32 s31, v42, 3 ; GISEL64-NEXT: v_readlane_b32 s5, v42, 1 ; GISEL64-NEXT: v_readlane_b32 s4, v42, 0 ; GISEL64-NEXT: v_readlane_b32 s0, v42, 4 @@ -9251,954 +12787,1853 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> ; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:260 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:512 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 4 ; msbs: dst=0 src0=0 src1=1 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v256*/, s33 offset:588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v257*/, s33 offset:592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v258*/, s33 offset:596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v259*/, s33 offset:600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v260*/, s33 offset:604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v261*/, s33 offset:608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v262*/, s33 offset:612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v263*/, s33 offset:616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v264*/, s33 offset:620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v265*/, s33 offset:624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v266*/, s33 offset:628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v267*/, s33 offset:632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v268*/, s33 offset:636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v269*/, s33 offset:640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v270*/, s33 offset:644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v271*/, s33 offset:648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v272*/, s33 offset:652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v273*/, s33 offset:656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v274*/, s33 offset:660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v275*/, s33 offset:664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v276*/, s33 offset:668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v277*/, s33 offset:672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v278*/, s33 offset:676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v279*/, s33 offset:680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v280*/, s33 offset:684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v281*/, s33 offset:688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v282*/, s33 offset:692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v283*/, s33 offset:696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v284*/, s33 offset:700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v285*/, s33 offset:704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v286*/, s33 offset:708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v287*/, s33 offset:712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v288*/, s33 offset:716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v289*/, s33 offset:720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v290*/, s33 offset:724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v291*/, s33 offset:728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v292*/, s33 offset:732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v293*/, s33 offset:736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v294*/, s33 offset:740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v295*/, s33 offset:744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v296*/, s33 offset:748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v297*/, s33 offset:752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v298*/, s33 offset:756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v299*/, s33 offset:760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v300*/, s33 offset:764 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v301*/, s33 offset:768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v302*/, s33 offset:772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v303*/, s33 offset:776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v304*/, s33 offset:780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v305*/, s33 offset:784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v306*/, s33 offset:788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v307*/, s33 offset:792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v308*/, s33 offset:796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v309*/, s33 offset:800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v310*/, s33 offset:804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v311*/, s33 offset:808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v312*/, s33 offset:812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v313*/, s33 offset:816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v314*/, s33 offset:820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v315*/, s33 offset:824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v316*/, s33 offset:828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v317*/, s33 offset:832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v318*/, s33 offset:836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v319*/, s33 offset:840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v320*/, s33 offset:844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v321*/, s33 offset:848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v322*/, s33 offset:852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v323*/, s33 offset:856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v324*/, s33 offset:860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v325*/, s33 offset:864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v326*/, s33 offset:868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v327*/, s33 offset:872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v328*/, s33 offset:876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v329*/, s33 offset:880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v330*/, s33 offset:884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v331*/, s33 offset:888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v332*/, s33 offset:892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v333*/, s33 offset:896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v334*/, s33 offset:900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v335*/, s33 offset:904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v336*/, s33 offset:908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v337*/, s33 offset:912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v338*/, s33 offset:916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v339*/, s33 offset:920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v340*/, s33 offset:924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v341*/, s33 offset:928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v342*/, s33 offset:932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v343*/, s33 offset:936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v344*/, s33 offset:940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v345*/, s33 offset:944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v346*/, s33 offset:948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v347*/, s33 offset:952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v348*/, s33 offset:956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v349*/, s33 offset:960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v350*/, s33 offset:964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v351*/, s33 offset:968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v352*/, s33 offset:972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v353*/, s33 offset:976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v354*/, s33 offset:980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v355*/, s33 offset:984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v356*/, s33 offset:988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v357*/, s33 offset:992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v358*/, s33 offset:996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v359*/, s33 offset:1000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v360*/, s33 offset:1004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v361*/, s33 offset:1008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v362*/, s33 offset:1012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v363*/, s33 offset:1016 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v364*/, s33 offset:1020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v365*/, s33 offset:1024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v366*/, s33 offset:1028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v367*/, s33 offset:1032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v368*/, s33 offset:1036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v369*/, s33 offset:1040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v370*/, s33 offset:1044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v371*/, s33 offset:1048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v372*/, s33 offset:1052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v373*/, s33 offset:1056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v374*/, s33 offset:1060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v375*/, s33 offset:1064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v376*/, s33 offset:1068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v377*/, s33 offset:1072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v378*/, s33 offset:1076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v379*/, s33 offset:1080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v380*/, s33 offset:1084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v381*/, s33 offset:1088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v382*/, s33 offset:1092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v383*/, s33 offset:1096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v384*/, s33 offset:1100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v385*/, s33 offset:1104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v386*/, s33 offset:1108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v387*/, s33 offset:1112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v388*/, s33 offset:1116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v389*/, s33 offset:1120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v390*/, s33 offset:1124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v391*/, s33 offset:1128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v392*/, s33 offset:1132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v393*/, s33 offset:1136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v394*/, s33 offset:1140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v395*/, s33 offset:1144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v396*/, s33 offset:1148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v397*/, s33 offset:1152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v398*/, s33 offset:1156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v399*/, s33 offset:1160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v400*/, s33 offset:1164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v401*/, s33 offset:1168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v402*/, s33 offset:1172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v403*/, s33 offset:1176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v404*/, s33 offset:1180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v405*/, s33 offset:1184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v406*/, s33 offset:1188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v407*/, s33 offset:1192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v408*/, s33 offset:1196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v409*/, s33 offset:1200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v410*/, s33 offset:1204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v411*/, s33 offset:1208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v412*/, s33 offset:1212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v413*/, s33 offset:1216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v414*/, s33 offset:1220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v415*/, s33 offset:1224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v416*/, s33 offset:1228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v417*/, s33 offset:1232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v418*/, s33 offset:1236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v419*/, s33 offset:1240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v420*/, s33 offset:1244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v421*/, s33 offset:1248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v422*/, s33 offset:1252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v423*/, s33 offset:1256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v424*/, s33 offset:1260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v425*/, s33 offset:1264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v426*/, s33 offset:1268 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v427*/, s33 offset:1272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v428*/, s33 offset:1276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v429*/, s33 offset:1280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v430*/, s33 offset:1284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v431*/, s33 offset:1288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v432*/, s33 offset:1292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v433*/, s33 offset:1296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v434*/, s33 offset:1300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v435*/, s33 offset:1304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v436*/, s33 offset:1308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v437*/, s33 offset:1312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v438*/, s33 offset:1316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v439*/, s33 offset:1320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v440*/, s33 offset:1324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v441*/, s33 offset:1328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v442*/, s33 offset:1332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v443*/, s33 offset:1336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v444*/, s33 offset:1340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v445*/, s33 offset:1344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v446*/, s33 offset:1348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v447*/, s33 offset:1352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v448*/, s33 offset:1356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v449*/, s33 offset:1360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v450*/, s33 offset:1364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v451*/, s33 offset:1368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v452*/, s33 offset:1372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v453*/, s33 offset:1376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v454*/, s33 offset:1380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v455*/, s33 offset:1384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v456*/, s33 offset:1388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v457*/, s33 offset:1392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v458*/, s33 offset:1396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v459*/, s33 offset:1400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v460*/, s33 offset:1404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v461*/, s33 offset:1408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v462*/, s33 offset:1412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v463*/, s33 offset:1416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v464*/, s33 offset:1420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v465*/, s33 offset:1424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v466*/, s33 offset:1428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v467*/, s33 offset:1432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v468*/, s33 offset:1436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v469*/, s33 offset:1440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v470*/, s33 offset:1444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v471*/, s33 offset:1448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v472*/, s33 offset:1452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v473*/, s33 offset:1456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v474*/, s33 offset:1460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v475*/, s33 offset:1464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v476*/, s33 offset:1468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v477*/, s33 offset:1472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v478*/, s33 offset:1476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v479*/, s33 offset:1480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v480*/, s33 offset:1484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v481*/, s33 offset:1488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v482*/, s33 offset:1492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v483*/, s33 offset:1496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v484*/, s33 offset:1500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v485*/, s33 offset:1504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v486*/, s33 offset:1508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v487*/, s33 offset:1512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v488*/, s33 offset:1516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v489*/, s33 offset:1520 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v490*/, s33 offset:1524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v491*/, s33 offset:1528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v492*/, s33 offset:1532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v493*/, s33 offset:1536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v494*/, s33 offset:1540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v495*/, s33 offset:1544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v496*/, s33 offset:1548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v497*/, s33 offset:1552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v498*/, s33 offset:1556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v499*/, s33 offset:1560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v500*/, s33 offset:1564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v501*/, s33 offset:1568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v502*/, s33 offset:1572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v503*/, s33 offset:1576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v504*/, s33 offset:1580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v505*/, s33 offset:1584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v506*/, s33 offset:1588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v507*/, s33 offset:1592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v508*/, s33 offset:1596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v509*/, s33 offset:1600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v510*/, s33 offset:1604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v511*/, s33 offset:1608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v512*/, s33 offset:1612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v513*/, s33 offset:1616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v514*/, s33 offset:1620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v515*/, s33 offset:1624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v516*/, s33 offset:1628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v517*/, s33 offset:1632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v518*/, s33 offset:1636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v519*/, s33 offset:1640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v520*/, s33 offset:1644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v521*/, s33 offset:1648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v522*/, s33 offset:1652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v523*/, s33 offset:1656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v524*/, s33 offset:1660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v525*/, s33 offset:1664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v526*/, s33 offset:1668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v527*/, s33 offset:1672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v528*/, s33 offset:1676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v529*/, s33 offset:1680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v530*/, s33 offset:1684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v531*/, s33 offset:1688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v532*/, s33 offset:1692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v533*/, s33 offset:1696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v534*/, s33 offset:1700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v535*/, s33 offset:1704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v536*/, s33 offset:1708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v537*/, s33 offset:1712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v538*/, s33 offset:1716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v539*/, s33 offset:1720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v540*/, s33 offset:1724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v541*/, s33 offset:1728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v542*/, s33 offset:1732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v543*/, s33 offset:1736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v544*/, s33 offset:1740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v545*/, s33 offset:1744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v546*/, s33 offset:1748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v547*/, s33 offset:1752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v548*/, s33 offset:1756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v549*/, s33 offset:1760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v550*/, s33 offset:1764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v551*/, s33 offset:1768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v552*/, s33 offset:1772 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v553*/, s33 offset:1776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v554*/, s33 offset:1780 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v555*/, s33 offset:1784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v556*/, s33 offset:1788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v557*/, s33 offset:1792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v558*/, s33 offset:1796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v559*/, s33 offset:1800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v560*/, s33 offset:1804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v561*/, s33 offset:1808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v562*/, s33 offset:1812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v563*/, s33 offset:1816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v564*/, s33 offset:1820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v565*/, s33 offset:1824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v566*/, s33 offset:1828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v567*/, s33 offset:1832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v568*/, s33 offset:1836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v569*/, s33 offset:1840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v570*/, s33 offset:1844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v571*/, s33 offset:1848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v572*/, s33 offset:1852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v573*/, s33 offset:1856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v574*/, s33 offset:1860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v575*/, s33 offset:1864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v576*/, s33 offset:1868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v577*/, s33 offset:1872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v578*/, s33 offset:1876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v579*/, s33 offset:1880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v580*/, s33 offset:1884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v581*/, s33 offset:1888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v582*/, s33 offset:1892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v583*/, s33 offset:1896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v584*/, s33 offset:1900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v585*/, s33 offset:1904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v586*/, s33 offset:1908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v587*/, s33 offset:1912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v588*/, s33 offset:1916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v589*/, s33 offset:1920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v590*/, s33 offset:1924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v591*/, s33 offset:1928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v592*/, s33 offset:1932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v593*/, s33 offset:1936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v594*/, s33 offset:1940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v595*/, s33 offset:1944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v596*/, s33 offset:1948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v597*/, s33 offset:1952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v598*/, s33 offset:1956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v599*/, s33 offset:1960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v600*/, s33 offset:1964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v601*/, s33 offset:1968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v602*/, s33 offset:1972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v603*/, s33 offset:1976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v604*/, s33 offset:1980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v605*/, s33 offset:1984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v606*/, s33 offset:1988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v607*/, s33 offset:1992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v608*/, s33 offset:1996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v609*/, s33 offset:2000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v610*/, s33 offset:2004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v611*/, s33 offset:2008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v612*/, s33 offset:2012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v613*/, s33 offset:2016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v614*/, s33 offset:2020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v615*/, s33 offset:2024 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v616*/, s33 offset:2028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v617*/, s33 offset:2032 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v618*/, s33 offset:2036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v619*/, s33 offset:2040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v620*/, s33 offset:2044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v621*/, s33 offset:2048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v622*/, s33 offset:2052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v623*/, s33 offset:2056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v624*/, s33 offset:2060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v625*/, s33 offset:2064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v626*/, s33 offset:2068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v627*/, s33 offset:2072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v628*/, s33 offset:2076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v629*/, s33 offset:2080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v630*/, s33 offset:2084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v631*/, s33 offset:2088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v632*/, s33 offset:2092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v633*/, s33 offset:2096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v634*/, s33 offset:2100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v635*/, s33 offset:2104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v636*/, s33 offset:2108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v637*/, s33 offset:2112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v638*/, s33 offset:2116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v639*/, s33 offset:2120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v640*/, s33 offset:2124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v641*/, s33 offset:2128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v642*/, s33 offset:2132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v643*/, s33 offset:2136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v644*/, s33 offset:2140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v645*/, s33 offset:2144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v646*/, s33 offset:2148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v647*/, s33 offset:2152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v648*/, s33 offset:2156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v649*/, s33 offset:2160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v650*/, s33 offset:2164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v651*/, s33 offset:2168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v652*/, s33 offset:2172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v653*/, s33 offset:2176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v654*/, s33 offset:2180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v655*/, s33 offset:2184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v656*/, s33 offset:2188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v657*/, s33 offset:2192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v658*/, s33 offset:2196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v659*/, s33 offset:2200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v660*/, s33 offset:2204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v661*/, s33 offset:2208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v662*/, s33 offset:2212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v663*/, s33 offset:2216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v664*/, s33 offset:2220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v665*/, s33 offset:2224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v666*/, s33 offset:2228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v667*/, s33 offset:2232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v668*/, s33 offset:2236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v669*/, s33 offset:2240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v670*/, s33 offset:2244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v671*/, s33 offset:2248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v672*/, s33 offset:2252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v673*/, s33 offset:2256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v674*/, s33 offset:2260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v675*/, s33 offset:2264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v676*/, s33 offset:2268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v677*/, s33 offset:2272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v678*/, s33 offset:2276 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v679*/, s33 offset:2280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v680*/, s33 offset:2284 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v681*/, s33 offset:2288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v682*/, s33 offset:2292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v683*/, s33 offset:2296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v684*/, s33 offset:2300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v685*/, s33 offset:2304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v686*/, s33 offset:2308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v687*/, s33 offset:2312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v688*/, s33 offset:2316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v689*/, s33 offset:2320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v690*/, s33 offset:2324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v691*/, s33 offset:2328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v692*/, s33 offset:2332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v693*/, s33 offset:2336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v694*/, s33 offset:2340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v695*/, s33 offset:2344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v696*/, s33 offset:2348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v697*/, s33 offset:2352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v698*/, s33 offset:2356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v699*/, s33 offset:2360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v700*/, s33 offset:2364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v701*/, s33 offset:2368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v702*/, s33 offset:2372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v703*/, s33 offset:2376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v704*/, s33 offset:2380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v705*/, s33 offset:2384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v706*/, s33 offset:2388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v707*/, s33 offset:2392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v708*/, s33 offset:2396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v709*/, s33 offset:2400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v710*/, s33 offset:2404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v711*/, s33 offset:2408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v712*/, s33 offset:2412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v713*/, s33 offset:2416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v714*/, s33 offset:2420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v715*/, s33 offset:2424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v716*/, s33 offset:2428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v717*/, s33 offset:2432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v718*/, s33 offset:2436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v719*/, s33 offset:2440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v720*/, s33 offset:2444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v721*/, s33 offset:2448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v722*/, s33 offset:2452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v723*/, s33 offset:2456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v724*/, s33 offset:2460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v725*/, s33 offset:2464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v726*/, s33 offset:2468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v727*/, s33 offset:2472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v728*/, s33 offset:2476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v729*/, s33 offset:2480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v730*/, s33 offset:2484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v731*/, s33 offset:2488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v732*/, s33 offset:2492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v733*/, s33 offset:2496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v734*/, s33 offset:2500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v735*/, s33 offset:2504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v736*/, s33 offset:2508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v737*/, s33 offset:2512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v738*/, s33 offset:2516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v739*/, s33 offset:2520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v740*/, s33 offset:2524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v741*/, s33 offset:2528 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v742*/, s33 offset:2532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v743*/, s33 offset:2536 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v744*/, s33 offset:2540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v745*/, s33 offset:2544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v746*/, s33 offset:2548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v747*/, s33 offset:2552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v748*/, s33 offset:2556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v749*/, s33 offset:2560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v750*/, s33 offset:2564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v751*/, s33 offset:2568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v752*/, s33 offset:2572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v753*/, s33 offset:2576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v754*/, s33 offset:2580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v755*/, s33 offset:2584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v756*/, s33 offset:2588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v757*/, s33 offset:2592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v758*/, s33 offset:2596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v759*/, s33 offset:2600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v760*/, s33 offset:2604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v761*/, s33 offset:2608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v762*/, s33 offset:2612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v763*/, s33 offset:2616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v764*/, s33 offset:2620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v765*/, s33 offset:2624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v766*/, s33 offset:2628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v767*/, s33 offset:2632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0 /*v768*/, s33 offset:2636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v1 /*v769*/, s33 offset:2640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v2 /*v770*/, s33 offset:2644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v3 /*v771*/, s33 offset:2648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v4 /*v772*/, s33 offset:2652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v5 /*v773*/, s33 offset:2656 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v6 /*v774*/, s33 offset:2660 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v7 /*v775*/, s33 offset:2664 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v8 /*v776*/, s33 offset:2668 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v9 /*v777*/, s33 offset:2672 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v10 /*v778*/, s33 offset:2676 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v11 /*v779*/, s33 offset:2680 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v12 /*v780*/, s33 offset:2684 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v13 /*v781*/, s33 offset:2688 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v14 /*v782*/, s33 offset:2692 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v15 /*v783*/, s33 offset:2696 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v16 /*v784*/, s33 offset:2700 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v17 /*v785*/, s33 offset:2704 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v18 /*v786*/, s33 offset:2708 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v19 /*v787*/, s33 offset:2712 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v20 /*v788*/, s33 offset:2716 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v21 /*v789*/, s33 offset:2720 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v22 /*v790*/, s33 offset:2724 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v23 /*v791*/, s33 offset:2728 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v24 /*v792*/, s33 offset:2732 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v25 /*v793*/, s33 offset:2736 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v26 /*v794*/, s33 offset:2740 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v27 /*v795*/, s33 offset:2744 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v28 /*v796*/, s33 offset:2748 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v29 /*v797*/, s33 offset:2752 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v30 /*v798*/, s33 offset:2756 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v31 /*v799*/, s33 offset:2760 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v32 /*v800*/, s33 offset:2764 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v33 /*v801*/, s33 offset:2768 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v34 /*v802*/, s33 offset:2772 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v35 /*v803*/, s33 offset:2776 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v36 /*v804*/, s33 offset:2780 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v37 /*v805*/, s33 offset:2784 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v38 /*v806*/, s33 offset:2788 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v39 /*v807*/, s33 offset:2792 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40 /*v808*/, s33 offset:2796 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41 /*v809*/, s33 offset:2800 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42 /*v810*/, s33 offset:2804 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v43 /*v811*/, s33 offset:2808 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v44 /*v812*/, s33 offset:2812 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v45 /*v813*/, s33 offset:2816 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v46 /*v814*/, s33 offset:2820 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v47 /*v815*/, s33 offset:2824 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v48 /*v816*/, s33 offset:2828 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v49 /*v817*/, s33 offset:2832 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v50 /*v818*/, s33 offset:2836 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v51 /*v819*/, s33 offset:2840 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v52 /*v820*/, s33 offset:2844 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v53 /*v821*/, s33 offset:2848 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v54 /*v822*/, s33 offset:2852 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v55 /*v823*/, s33 offset:2856 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v56 /*v824*/, s33 offset:2860 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v57 /*v825*/, s33 offset:2864 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v58 /*v826*/, s33 offset:2868 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v59 /*v827*/, s33 offset:2872 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v60 /*v828*/, s33 offset:2876 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v61 /*v829*/, s33 offset:2880 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v62 /*v830*/, s33 offset:2884 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v63 /*v831*/, s33 offset:2888 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v64 /*v832*/, s33 offset:2892 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v65 /*v833*/, s33 offset:2896 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v66 /*v834*/, s33 offset:2900 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v67 /*v835*/, s33 offset:2904 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v68 /*v836*/, s33 offset:2908 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v69 /*v837*/, s33 offset:2912 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v70 /*v838*/, s33 offset:2916 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v71 /*v839*/, s33 offset:2920 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v72 /*v840*/, s33 offset:2924 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v73 /*v841*/, s33 offset:2928 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v74 /*v842*/, s33 offset:2932 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v75 /*v843*/, s33 offset:2936 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v76 /*v844*/, s33 offset:2940 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v77 /*v845*/, s33 offset:2944 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v78 /*v846*/, s33 offset:2948 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v79 /*v847*/, s33 offset:2952 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v80 /*v848*/, s33 offset:2956 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v81 /*v849*/, s33 offset:2960 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v82 /*v850*/, s33 offset:2964 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v83 /*v851*/, s33 offset:2968 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v84 /*v852*/, s33 offset:2972 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v85 /*v853*/, s33 offset:2976 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v86 /*v854*/, s33 offset:2980 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v87 /*v855*/, s33 offset:2984 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v88 /*v856*/, s33 offset:2988 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v89 /*v857*/, s33 offset:2992 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v90 /*v858*/, s33 offset:2996 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v91 /*v859*/, s33 offset:3000 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v92 /*v860*/, s33 offset:3004 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v93 /*v861*/, s33 offset:3008 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v94 /*v862*/, s33 offset:3012 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v95 /*v863*/, s33 offset:3016 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v96 /*v864*/, s33 offset:3020 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v97 /*v865*/, s33 offset:3024 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v98 /*v866*/, s33 offset:3028 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v99 /*v867*/, s33 offset:3032 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v100 /*v868*/, s33 offset:3036 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v101 /*v869*/, s33 offset:3040 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v102 /*v870*/, s33 offset:3044 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v103 /*v871*/, s33 offset:3048 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v104 /*v872*/, s33 offset:3052 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v105 /*v873*/, s33 offset:3056 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v106 /*v874*/, s33 offset:3060 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v107 /*v875*/, s33 offset:3064 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v108 /*v876*/, s33 offset:3068 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v109 /*v877*/, s33 offset:3072 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v110 /*v878*/, s33 offset:3076 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v111 /*v879*/, s33 offset:3080 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v112 /*v880*/, s33 offset:3084 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v113 /*v881*/, s33 offset:3088 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v114 /*v882*/, s33 offset:3092 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v115 /*v883*/, s33 offset:3096 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v116 /*v884*/, s33 offset:3100 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v117 /*v885*/, s33 offset:3104 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v118 /*v886*/, s33 offset:3108 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v119 /*v887*/, s33 offset:3112 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v120 /*v888*/, s33 offset:3116 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v121 /*v889*/, s33 offset:3120 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v122 /*v890*/, s33 offset:3124 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v123 /*v891*/, s33 offset:3128 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v124 /*v892*/, s33 offset:3132 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v125 /*v893*/, s33 offset:3136 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v126 /*v894*/, s33 offset:3140 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v127 /*v895*/, s33 offset:3144 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v128 /*v896*/, s33 offset:3148 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v129 /*v897*/, s33 offset:3152 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v130 /*v898*/, s33 offset:3156 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v131 /*v899*/, s33 offset:3160 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v132 /*v900*/, s33 offset:3164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v133 /*v901*/, s33 offset:3168 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v134 /*v902*/, s33 offset:3172 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v135 /*v903*/, s33 offset:3176 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v136 /*v904*/, s33 offset:3180 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v137 /*v905*/, s33 offset:3184 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v138 /*v906*/, s33 offset:3188 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v139 /*v907*/, s33 offset:3192 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v140 /*v908*/, s33 offset:3196 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v141 /*v909*/, s33 offset:3200 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v142 /*v910*/, s33 offset:3204 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v143 /*v911*/, s33 offset:3208 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v144 /*v912*/, s33 offset:3212 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v145 /*v913*/, s33 offset:3216 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v146 /*v914*/, s33 offset:3220 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v147 /*v915*/, s33 offset:3224 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v148 /*v916*/, s33 offset:3228 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v149 /*v917*/, s33 offset:3232 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v150 /*v918*/, s33 offset:3236 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v151 /*v919*/, s33 offset:3240 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v152 /*v920*/, s33 offset:3244 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v153 /*v921*/, s33 offset:3248 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v154 /*v922*/, s33 offset:3252 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v155 /*v923*/, s33 offset:3256 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v156 /*v924*/, s33 offset:3260 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v157 /*v925*/, s33 offset:3264 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v158 /*v926*/, s33 offset:3268 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v159 /*v927*/, s33 offset:3272 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v160 /*v928*/, s33 offset:3276 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v161 /*v929*/, s33 offset:3280 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v162 /*v930*/, s33 offset:3284 ; GFX1250-DAGISEL-NEXT: s_clause 0x3e ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v163 /*v931*/, s33 offset:3288 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v164 /*v932*/, s33 offset:3292 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v165 /*v933*/, s33 offset:3296 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v166 /*v934*/, s33 offset:3300 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v167 /*v935*/, s33 offset:3304 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v168 /*v936*/, s33 offset:3308 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v169 /*v937*/, s33 offset:3312 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v170 /*v938*/, s33 offset:3316 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v171 /*v939*/, s33 offset:3320 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v172 /*v940*/, s33 offset:3324 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v173 /*v941*/, s33 offset:3328 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v174 /*v942*/, s33 offset:3332 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v175 /*v943*/, s33 offset:3336 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v176 /*v944*/, s33 offset:3340 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v177 /*v945*/, s33 offset:3344 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v178 /*v946*/, s33 offset:3348 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v179 /*v947*/, s33 offset:3352 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v180 /*v948*/, s33 offset:3356 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v181 /*v949*/, s33 offset:3360 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v182 /*v950*/, s33 offset:3364 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v183 /*v951*/, s33 offset:3368 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v184 /*v952*/, s33 offset:3372 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v185 /*v953*/, s33 offset:3376 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v186 /*v954*/, s33 offset:3380 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v187 /*v955*/, s33 offset:3384 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v188 /*v956*/, s33 offset:3388 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v189 /*v957*/, s33 offset:3392 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v190 /*v958*/, s33 offset:3396 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v191 /*v959*/, s33 offset:3400 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v192 /*v960*/, s33 offset:3404 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v193 /*v961*/, s33 offset:3408 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v194 /*v962*/, s33 offset:3412 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v195 /*v963*/, s33 offset:3416 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v196 /*v964*/, s33 offset:3420 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v197 /*v965*/, s33 offset:3424 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v198 /*v966*/, s33 offset:3428 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v199 /*v967*/, s33 offset:3432 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v200 /*v968*/, s33 offset:3436 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v201 /*v969*/, s33 offset:3440 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v202 /*v970*/, s33 offset:3444 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v203 /*v971*/, s33 offset:3448 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v204 /*v972*/, s33 offset:3452 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v205 /*v973*/, s33 offset:3456 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v206 /*v974*/, s33 offset:3460 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v207 /*v975*/, s33 offset:3464 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v208 /*v976*/, s33 offset:3468 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v209 /*v977*/, s33 offset:3472 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v210 /*v978*/, s33 offset:3476 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v211 /*v979*/, s33 offset:3480 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v212 /*v980*/, s33 offset:3484 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v213 /*v981*/, s33 offset:3488 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v214 /*v982*/, s33 offset:3492 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v215 /*v983*/, s33 offset:3496 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v216 /*v984*/, s33 offset:3500 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v217 /*v985*/, s33 offset:3504 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v218 /*v986*/, s33 offset:3508 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v219 /*v987*/, s33 offset:3512 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v220 /*v988*/, s33 offset:3516 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v221 /*v989*/, s33 offset:3520 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v222 /*v990*/, s33 offset:3524 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v223 /*v991*/, s33 offset:3528 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v224 /*v992*/, s33 offset:3532 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v225 /*v993*/, s33 offset:3536 ; GFX1250-DAGISEL-NEXT: s_clause 0x1d ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v226 /*v994*/, s33 offset:3540 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v227 /*v995*/, s33 offset:3544 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v228 /*v996*/, s33 offset:3548 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v229 /*v997*/, s33 offset:3552 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v230 /*v998*/, s33 offset:3556 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v231 /*v999*/, s33 offset:3560 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v232 /*v1000*/, s33 offset:3564 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v233 /*v1001*/, s33 offset:3568 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v234 /*v1002*/, s33 offset:3572 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v235 /*v1003*/, s33 offset:3576 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v236 /*v1004*/, s33 offset:3580 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v237 /*v1005*/, s33 offset:3584 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v238 /*v1006*/, s33 offset:3588 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v239 /*v1007*/, s33 offset:3592 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v240 /*v1008*/, s33 offset:3596 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v241 /*v1009*/, s33 offset:3600 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v242 /*v1010*/, s33 offset:3604 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v243 /*v1011*/, s33 offset:3608 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v244 /*v1012*/, s33 offset:3612 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v245 /*v1013*/, s33 offset:3616 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v246 /*v1014*/, s33 offset:3620 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v247 /*v1015*/, s33 offset:3624 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v248 /*v1016*/, s33 offset:3628 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v249 /*v1017*/, s33 offset:3632 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v250 /*v1018*/, s33 offset:3636 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v251 /*v1019*/, s33 offset:3640 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v252 /*v1020*/, s33 offset:3644 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v253 /*v1021*/, s33 offset:3648 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v254 /*v1022*/, s33 offset:3652 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v255 /*v1023*/, s33 offset:3656 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, -1 ; GFX1250-DAGISEL-NEXT: s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-DAGISEL-NEXT: s_clause 0x2 ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v42, s33 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164 +; GFX1250-DAGISEL-NEXT: ; meta instruction ; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168 ; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x2 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s0, 3 -; GFX1250-DAGISEL-NEXT: s_mov_b64 s[0:1], callee@abs64 ; GFX1250-DAGISEL-NEXT: s_addk_co_i32 s32, 0xe50 -; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 -; GFX1250-DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s4, 0 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s30, 1 ; GFX1250-DAGISEL-NEXT: v_writelane_b32 v42, s31, 2 +; GFX1250-DAGISEL-NEXT: s_mov_b64 s[0:1], callee@abs64 +; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8 ; GFX1250-DAGISEL-NEXT: s_swap_pc_i64 s[30:31], s[0:1] ; GFX1250-DAGISEL-NEXT: flat_store_b32 v[40:41], v0 -; GFX1250-DAGISEL-NEXT: v_readlane_b32 s31, v42, 2 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s30, v42, 1 +; GFX1250-DAGISEL-NEXT: v_readlane_b32 s31, v42, 2 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s4, v42, 0 ; GFX1250-DAGISEL-NEXT: v_readlane_b32 s0, v42, 3 ; GFX1250-DAGISEL-NEXT: s_clause 0x2 diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll index 06c451869e841..9eea46172ce81 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll @@ -22,9 +22,9 @@ define void @vector_reg_liverange_split() #0 { ; GFX90A-NEXT: v_writelane_b32 v40, s28, 2 ; GFX90A-NEXT: v_writelane_b32 v40, s29, 3 ; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 -; GFX90A-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s20 ; GFX90A-NEXT: ;;#ASMEND @@ -41,12 +41,12 @@ define void @vector_reg_liverange_split() #0 { ; GFX90A-NEXT: s_or_saveexec_b64 s[28:29], -1 ; GFX90A-NEXT: v_accvgpr_read_b32 v39, a32 ; GFX90A-NEXT: s_mov_b64 exec, s[28:29] +; GFX90A-NEXT: v_readlane_b32 s30, v40, 0 ; GFX90A-NEXT: v_readlane_b32 s20, v39, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s20 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v40, 1 -; GFX90A-NEXT: v_readlane_b32 s30, v40, 0 ; GFX90A-NEXT: s_mov_b32 s32, s33 ; GFX90A-NEXT: v_readlane_b32 s4, v40, 4 ; GFX90A-NEXT: v_readlane_b32 s28, v40, 2 diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll index 9e9fe1809c780..b3ad8880b85a9 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll @@ -27,9 +27,9 @@ define void @test() #0 { ; GCN-NEXT: v_writelane_b32 v40, s28, 2 ; GCN-NEXT: v_writelane_b32 v40, s29, 3 ; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s16 ; GCN-NEXT: ;;#ASMEND @@ -49,10 +49,10 @@ define void @test() #0 { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s4, v39, 0 ; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: v_readlane_b32 s4, v40, 4 ; GCN-NEXT: v_readlane_b32 s28, v40, 2 @@ -111,8 +111,8 @@ define void @test() #0 { ; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 ; GCN-O0-NEXT: global_store_dword v[0:1], v2, off ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-O0-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-O0-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-O0-NEXT: s_mov_b32 s32, s33 ; GCN-O0-NEXT: v_readlane_b32 s4, v40, 4 ; GCN-O0-NEXT: v_readlane_b32 s28, v40, 2 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index 74e9ab718c3d2..f28ceb4e0d8b7 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -387,8 +387,8 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg ; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 -; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 +; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-O0-NEXT: s_mov_b32 s32, s33 ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload @@ -424,9 +424,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg ; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-O3-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 ; GFX9-O3-NEXT: v_readlane_b32 s31, v3, 1 -; GFX9-O3-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O3-NEXT: s_mov_b32 s32, s33 ; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -624,8 +624,8 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4 -; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 ; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 +; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 ; GFX9-O0-NEXT: s_mov_b32 s32, s33 ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload @@ -685,9 +685,9 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O3-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O3-NEXT: v_readlane_b32 s30, v8, 0 ; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4 ; GFX9-O3-NEXT: v_readlane_b32 s31, v8, 1 -; GFX9-O3-NEXT: v_readlane_b32 s30, v8, 0 ; GFX9-O3-NEXT: s_mov_b32 s32, s33 ; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload diff --git a/llvm/test/DebugInfo/AMDGPU/cfi.ll b/llvm/test/DebugInfo/AMDGPU/cfi.ll index 686cf4b654e35..c7c23bc632fe7 100644 --- a/llvm/test/DebugInfo/AMDGPU/cfi.ll +++ b/llvm/test/DebugInfo/AMDGPU/cfi.ll @@ -15,6 +15,9 @@ ; CHECK-EMPTY: ; CHECK: 00000010 {{[0-9]+}} 00000000 FDE cie=00000000 pc=00000000...{{[0-9]+}} ; CHECK-NEXT: Format: DWARF32 +; CHECK-NEXT: DW_CFA_LLVM_def_aspace_cfa: SGPR32 +0 in addrspace6 +; CHECK-NEXT: DW_CFA_expression: PC_REG DW_OP_regx SGPR30, DW_OP_piece 0x4, DW_OP_regx SGPR31, DW_OP_piece 0x4 +; CHECK-NEXT: DW_CFA_nop: ; CHECK-EMPTY: ; CHECK: .eh_frame contents: ; CHECK-NOT: CIE diff --git a/llvm/test/DebugInfo/AMDGPU/debug-loc-copy.ll b/llvm/test/DebugInfo/AMDGPU/debug-loc-copy.ll index 1f13282a1f04c..a87ce1c79055a 100644 --- a/llvm/test/DebugInfo/AMDGPU/debug-loc-copy.ll +++ b/llvm/test/DebugInfo/AMDGPU/debug-loc-copy.ll @@ -4,6 +4,8 @@ ; Verify that the debug locations in this function are correct, in particular ; that the location for %cast doesn't appear in the block of %lab. + + define void @_Z12lane_pc_testj() #0 !dbg !9 { ; GCN-LABEL: _Z12lane_pc_testj: ; GCN: .Lfunc_begin0: @@ -12,6 +14,16 @@ define void @_Z12lane_pc_testj() #0 !dbg !9 { ; GCN-NEXT: .cfi_sections .debug_frame ; GCN-NEXT: .cfi_startproc ; GCN-NEXT: ; %bb.0: +; GCN-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; GCN-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; GCN-NEXT: .cfi_undefined 1536 +; GCN-NEXT: .cfi_undefined 1537 +; GCN-NEXT: .cfi_undefined 1538 +; GCN-NEXT: .cfi_undefined 36 +; GCN-NEXT: .cfi_undefined 37 +; GCN-NEXT: .cfi_undefined 38 +; GCN-NEXT: .cfi_undefined 39 +; GCN-NEXT: .cfi_undefined 40 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: ; %bb.1: ; %lab ; GCN-NEXT: s_mov_b64 s[4:5], 0 diff --git a/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-cfi-directives.s b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-cfi-directives.s new file mode 100644 index 0000000000000..d742cfc49689c --- /dev/null +++ b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-cfi-directives.s @@ -0,0 +1,57 @@ +; RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1100 -filetype=obj %s | llvm-dwarfdump -debug-frame - | FileCheck %s + +.text +.cfi_sections .debug_frame + +; CHECK-NOT: DW_CFA_expression + +register_pair: + .cfi_startproc + s_nop 2 + ; CHECK: DW_CFA_expression: PC_REG DW_OP_regx SGPR30, DW_OP_piece 0x4, DW_OP_regx SGPR31, DW_OP_piece 0x4 + .cfi_llvm_register_pair 16, 62, 32, 63, 32 + s_nop 2 + .cfi_endproc + +; CHECK-NOT: DW_CFA_expression + +vector_registers: + .cfi_startproc + s_nop 2 + ; CHECK: DW_CFA_expression: PC_REG DW_OP_regx 0x67f, DW_OP_bit_piece 0x20 0x0, DW_OP_regx 0x67f, DW_OP_bit_piece 0x20 0x20 + .cfi_llvm_vector_registers 16, 1663, 0, 32, 1663, 1, 32 + s_nop 2 + .cfi_endproc + +; CHECK-NOT: DW_CFA_expression + +vector_registers_single: + .cfi_startproc + s_nop 2 + ;; Note that 0x2c below is the offset in the VGPR, so 4 (bytes, vgpr lane size) * 11 (the lane). + ; CHECK: DW_CFA_expression: SGPR45 DW_OP_regx VGPR41, DW_OP_LLVM_user DW_OP_LLVM_offset_uconst 0x2c + .cfi_llvm_vector_registers 77, 2601, 11, 32 + s_nop 2 + .cfi_endproc + +; CHECK-NOT: DW_CFA_expression + +vector_offsets: + .cfi_startproc + s_nop 2 + ; CHECK: DW_CFA_expression: VGPR40 DW_OP_regx VGPR40, DW_OP_swap, DW_OP_LLVM_user DW_OP_LLVM_offset_uconst 0x100, DW_OP_LLVM_user DW_OP_LLVM_call_frame_entry_reg EXEC, DW_OP_deref_size 0x8, DW_OP_LLVM_user DW_OP_LLVM_select_bit_piece 0x20 0x40 + .cfi_llvm_vector_offset 2600, 32, 17, 64, 256 + s_nop 2 + .cfi_endproc + +; CHECK-NOT: DW_CFA_expression + +vector_register_mask: + .cfi_startproc + s_nop 0 + ; CHECK: DW_CFA_expression: VGPR40 DW_OP_regx VGPR40, DW_OP_regx AGPR0, DW_OP_LLVM_user DW_OP_LLVM_call_frame_entry_reg EXEC, DW_OP_deref_size 0x8, DW_OP_LLVM_user DW_OP_LLVM_select_bit_piece 0x20 0x40 + .cfi_llvm_vector_register_mask 2600, 3072, 32, 17, 64 + s_nop 0 + .cfi_endproc + +; CHECK-NOT: DW_CFA_expression diff --git a/llvm/test/MC/ELF/cfi-register-pair.s b/llvm/test/MC/ELF/cfi-register-pair.s new file mode 100644 index 0000000000000..05ef8e9ae2a4d --- /dev/null +++ b/llvm/test/MC/ELF/cfi-register-pair.s @@ -0,0 +1,56 @@ +# RUN: llvm-mc -triple amdgcn-amd-amdhsa %s | FileCheck %s --check-prefix=ASM +# RUN: llvm-mc -filetype=obj -triple amdgcn-amd-amdhsa %s | llvm-readobj -S --sr --sd - | FileCheck %s + +# REQUIRES: amdgpu-registered-target + +# ASM: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +# ASM-NEXT: s_nop 0 + +f: + .cfi_startproc + s_nop 0 + .cfi_llvm_register_pair 16, 62, 32, 63, 32 + s_nop 0 + .cfi_endproc + +// CHECK: Section { +// CHECK: Index: +// CHECK: Name: .eh_frame +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: 0x48 +// CHECK-NEXT: Size: 56 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: ] +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 0000: 10000000 00000000 017A5200 04041001 |.........zR.....| +// CHECK-NEXT: 0010: 1B000000 20000000 18000000 00000000 |.... ...........| +// CHECK-NEXT: 0020: 08000000 00411010 08903E93 04903F93 |.....A....>...?.| +// CHECK-NEXT: 0030: 04000000 00000000 |........| +// CHECK-NEXT: ) +// CHECK-NEXT: } +// CHECK-NEXT: Section { +// CHECK-NEXT: Index: +// CHECK-NEXT: Name: .rela.eh_frame +// CHECK-NEXT: Type: SHT_RELA +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_INFO_LINK +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: +// CHECK-NEXT: Size: 24 +// CHECK-NEXT: Link: +// CHECK-NEXT: Info: +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 24 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: 0x1C R_AMDGPU_REL32 .text +// CHECK-NEXT: ] +// CHECK: } diff --git a/llvm/test/MC/ELF/cfi-vector-offset.s b/llvm/test/MC/ELF/cfi-vector-offset.s new file mode 100644 index 0000000000000..7817396b8f316 --- /dev/null +++ b/llvm/test/MC/ELF/cfi-vector-offset.s @@ -0,0 +1,56 @@ +# RUN: llvm-mc -triple amdgcn-amd-amdhsa %s | FileCheck %s --check-prefix=ASM +# RUN: llvm-mc -filetype=obj -triple amdgcn-amd-amdhsa %s | llvm-readobj -S --sr --sd - | FileCheck %s + +# REQUIRES: amdgpu-registered-target + +# ASM: .cfi_llvm_vector_offset 2600, 32, 17, 64, 256 +# ASM-NEXT: s_nop 0 + +f: + .cfi_startproc + s_nop 0 + .cfi_llvm_vector_offset 2600, 32, 17, 64, 256 + s_nop 0 + .cfi_endproc + +// CHECK: Section { +// CHECK: Index: +// CHECK: Name: .eh_frame +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: 0x48 +// CHECK-NEXT: Size: 64 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: ] +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 0000: 10000000 00000000 017A5200 04041001 |.........zR.....| +// CHECK-NEXT: 0010: 1B000000 28000000 18000000 00000000 |....(...........| +// CHECK-NEXT: 0020: 08000000 004110A8 141190A8 1416E905 |.....A..........| +// CHECK-NEXT: 0030: 8002E907 119408E9 0C204000 00000000 |......... @.....| +// CHECK-NEXT: ) +// CHECK-NEXT: } +// CHECK-NEXT: Section { +// CHECK-NEXT: Index: +// CHECK-NEXT: Name: .rela.eh_frame +// CHECK-NEXT: Type: SHT_RELA +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_INFO_LINK +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: +// CHECK-NEXT: Size: 24 +// CHECK-NEXT: Link: +// CHECK-NEXT: Info: +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 24 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: 0x1C R_AMDGPU_REL32 .text +// CHECK-NEXT: ] +// CHECK: } diff --git a/llvm/test/MC/ELF/cfi-vector-registers.s b/llvm/test/MC/ELF/cfi-vector-registers.s new file mode 100644 index 0000000000000..76f001007a272 --- /dev/null +++ b/llvm/test/MC/ELF/cfi-vector-registers.s @@ -0,0 +1,56 @@ +# RUN: llvm-mc -triple amdgcn-amd-amdhsa %s | FileCheck %s --check-prefix=ASM +# RUN: llvm-mc -filetype=obj -triple amdgcn-amd-amdhsa -mcpu=gfx908 %s | llvm-readobj -S --sr --sd - | FileCheck %s + +# REQUIRES: amdgpu-registered-target + +# ASM: .cfi_llvm_vector_registers 16, 1663, 0, 32, 1663, 1, 32 +# ASM-NEXT: s_nop 0 + +f: + .cfi_startproc + s_nop 0 + .cfi_llvm_vector_registers 16, 1663, 0, 32, 1663, 1, 32 + s_nop 0 + .cfi_endproc + +// CHECK: Section { +// CHECK: Index: +// CHECK: Name: .eh_frame +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: 0x48 +// CHECK-NEXT: Size: 56 +// CHECK-NEXT: Link: 0 +// CHECK-NEXT: Info: 0 +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 0 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: ] +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 0000: 10000000 00000000 017A5200 04041001 |.........zR.....| +// CHECK-NEXT: 0010: 1B000000 20000000 18000000 00000000 |.... ...........| +// CHECK-NEXT: 0020: 08000000 00411010 0C90FF0C 9D200090 |.....A....... ..| +// CHECK-NEXT: 0030: FF0C9D20 20000000 |... ...| +// CHECK-NEXT: ) +// CHECK-NEXT: } +// CHECK-NEXT: Section { +// CHECK-NEXT: Index: +// CHECK-NEXT: Name: .rela.eh_frame +// CHECK-NEXT: Type: SHT_RELA +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_INFO_LINK +// CHECK-NEXT: ] +// CHECK-NEXT: Address: 0x0 +// CHECK-NEXT: Offset: +// CHECK-NEXT: Size: 24 +// CHECK-NEXT: Link: +// CHECK-NEXT: Info: +// CHECK-NEXT: AddressAlignment: 8 +// CHECK-NEXT: EntrySize: 24 +// CHECK-NEXT: Relocations [ +// CHECK-NEXT: 0x1C R_AMDGPU_REL32 .text +// CHECK-NEXT: ] +// CHECK: } diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected index a8c2531117f42..0a85133152679 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected @@ -69,9 +69,22 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: .type .Lcheck_boundaries$local,@function ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 +; CHECK-NEXT: .cfi_undefined 38 +; CHECK-NEXT: .cfi_undefined 39 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s8, s33 +; CHECK-NEXT: .cfi_register 65, 40 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_addk_i32 s32, 0x600 ; CHECK-NEXT: v_mov_b32_e32 v4, 0 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 @@ -102,6 +115,7 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s8 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -111,9 +125,21 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: .type .Lmain$local,@function ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 2565 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s6, s33 +; CHECK-NEXT: .cfi_register 65, 38 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_addk_i32 s32, 0x600 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_getpc_b64 s[4:5] @@ -139,6 +165,7 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:16 ; CHECK-NEXT: s_mov_b32 s32, s33 +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s6 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected index 34530f2f632e2..df156b1b2e1b4 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected @@ -10,9 +10,22 @@ define dso_local i32 @check_boundaries() #0 { ; CHECK-NEXT: .type .Lcheck_boundaries$local,@function ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 +; CHECK-NEXT: .cfi_undefined 38 +; CHECK-NEXT: .cfi_undefined 39 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s8, s33 +; CHECK-NEXT: .cfi_register 65, 40 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_addk_i32 s32, 0x600 ; CHECK-NEXT: v_mov_b32_e32 v4, 0 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 @@ -43,6 +56,7 @@ define dso_local i32 @check_boundaries() #0 { ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_mov_b32 s32, s33 +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s8 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -88,9 +102,21 @@ define dso_local i32 @main() #0 { ; CHECK-NEXT: .type .Lmain$local,@function ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: .cfi_llvm_def_aspace_cfa 64, 0, 6 +; CHECK-NEXT: .cfi_llvm_register_pair 16, 62, 32, 63, 32 +; CHECK-NEXT: .cfi_undefined 2560 +; CHECK-NEXT: .cfi_undefined 2561 +; CHECK-NEXT: .cfi_undefined 2562 +; CHECK-NEXT: .cfi_undefined 2563 +; CHECK-NEXT: .cfi_undefined 2564 +; CHECK-NEXT: .cfi_undefined 2565 +; CHECK-NEXT: .cfi_undefined 36 +; CHECK-NEXT: .cfi_undefined 37 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s6, s33 +; CHECK-NEXT: .cfi_register 65, 38 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: .cfi_def_cfa_register 65 ; CHECK-NEXT: s_addk_i32 s32, 0x600 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_getpc_b64 s[4:5] @@ -116,6 +142,7 @@ define dso_local i32 @main() #0 { ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:16 ; CHECK-NEXT: s_mov_b32 s32, s33 +; CHECK-NEXT: .cfi_def_cfa_register 64 ; CHECK-NEXT: s_mov_b32 s33, s6 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31]